AIML CAPSTONE PROJECT
NATURAL LANGUAGE PROCESSING - CHATBOT INTERFACE
Title : Industrial Safety Analysis Using Natural Language Processing
Group Members:
1. Abhijith Warrier M
2. Kamalkar Sripad
3. Prasanjit Ghosh
4. Ranjithkumar S
5. Rishabh shukla
6. Nishant Bhardwaj
Milestone 1:
PROBLEM STATEMENT
DOMAIN: Industrial safety. NLP based Chatbot.
CONTEXT: The database comes from one of the biggest industry in Brazil and in the world. It is an urgent need for industries/companies around the globe to understand why employees still suffer some injuries/accidents in plants. Sometimes they also die in such environment.
DATA DESCRIPTION:
This The database is basically records of accidents from 12 different plants in 03 different countries which every line in the data is an occurrence of an accident. Columns description:
‣ Data: timestamp or time/date information
‣ Countries: which country the accident occurred (anonymised)
‣ Local: the city where the manufacturing plant is located (anonymised)
‣ Industry sector: which sector the plant belongs to
‣ Accident level: from I to VI, it registers how severe was the accident (I means not severe but VI means very severe)
‣ Potential Accident Level: Depending on the Accident Level, the database also registers how severe the accident could have been (due to other factors involved in the accident)
‣ Genre: if the person is male of female
‣ Employee or Third Party: if the injured person is an employee or a third party
‣ Critical Risk: some description of the risk involved in the accident
‣ Description: Detailed description of how the accident happened.
PROJECT OBJECTIVE: Design a ML/DL based chatbot utility which can help the professionals to highlight the safety risk as per the incident description.
Importing Necessary Libraries
#importing python libraries
#importing necessary python libraries
import os
import re
import nltk
import spacy
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")
#importing bokeh for Visualization
import holoviews as hv
from holoviews import opts
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20c
from bokeh.plotting import figure, show
from bokeh.transform import cumsum
import time
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
#creating a lambda function as f for calculating the value for the bi varient plots
f = lambda x : np.round(x/x.sum() * 100)
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
#import ML classifier and metrics from sklearn
#importing Classification models and Ensemble models
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,f1_score,recall_score,precision_score,roc_auc_score
from sklearn.metrics import classification_report,confusion_matrix
# Importing TensorFlow libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from keras.utils import to_categorical
from keras.optimizers import SGD, Adam
from tensorflow.keras import layers, models, Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Embedding
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Dense, Dropout, Input, GlobalMaxPool1D
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, f1_score, recall_score
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.models import model_from_json
# Importing stopwords to remove them from the corpus
from nltk.corpus import stopwords
from tensorflow.keras.models import load_model
print(tf.__version__)
# Helps to pad the sequences into the same length
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Layers that are used to implement the LSTM model
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
#import train test split model from sklearn model selection to create train and test dataset
from sklearn.model_selection import train_test_split
#importing countvectorizer from sklearn feature extracting for converting text data into numberic
from sklearn.feature_extraction.text import CountVectorizer
#import TF-IDF Vectorizer from sklearn feature extraction
from sklearn.feature_extraction.text import TfidfVectorizer
#importing label encode to convert the labels in the target columns into a number
from sklearn.preprocessing import LabelEncoder
#importing the stopwords helps to remove the stopwords from the corpus
from nltk.corpus import stopwords
from wordcloud import WordCloud
2.12.0
1.1 Importing the Data
#importing the given csv file using the read_csv function from pandas
#using try and except method to display if we encounter a error while importing
try:
ind_data = pd.read_csv('Data Set - industrial_safety_and_health_database_with_accidents_description.csv')
print("\033[1mReport: Dataset Imported\033[0m") #printing comment
except Exception as e:
print('\033[1mReport: Error Importing Data:\033[0m',e) #printing error
Report: Dataset Imported
1.2 Making Copy of the Dataset
#making copy of the data by droping the unnamed: 0 columns using drop function from pandas
data = ind_data.copy().drop('Unnamed: 0',axis=1)
data[:10]
| Data | Countries | Local | Industry Sector | Accident Level | Potential Accident Level | Genre | Employee or Third Party | Critical Risk | Description | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 00:00:00 | Country_01 | Local_01 | Mining | I | IV | Male | Third Party | Pressed | While removing the drill rod of the Jumbo 08 f... |
| 1 | 2016-01-02 00:00:00 | Country_02 | Local_02 | Mining | I | IV | Male | Employee | Pressurized Systems | During the activation of a sodium sulphide pum... |
| 2 | 2016-01-06 00:00:00 | Country_01 | Local_03 | Mining | I | III | Male | Third Party (Remote) | Manual Tools | In the sub-station MILPO located at level +170... |
| 3 | 2016-01-08 00:00:00 | Country_01 | Local_04 | Mining | I | I | Male | Third Party | Others | Being 9:45 am. approximately in the Nv. 1880 C... |
| 4 | 2016-01-10 00:00:00 | Country_01 | Local_04 | Mining | IV | IV | Male | Third Party | Others | Approximately at 11:45 a.m. in circumstances t... |
| 5 | 2016-01-12 00:00:00 | Country_02 | Local_05 | Metals | I | III | Male | Third Party (Remote) | Pressurized Systems | During the unloading operation of the ustulado... |
| 6 | 2016-01-16 00:00:00 | Country_02 | Local_05 | Metals | I | III | Male | Employee | Fall prevention (same level) | The collaborator reports that he was on street... |
| 7 | 2016-01-17 00:00:00 | Country_01 | Local_04 | Mining | I | III | Male | Third Party | Pressed | At approximately 04:50 p.m., when the mechanic... |
| 8 | 2016-01-19 00:00:00 | Country_02 | Local_02 | Mining | I | IV | Male | Third Party (Remote) | Others | Employee was sitting in the resting area at le... |
| 9 | 2016-01-26 00:00:00 | Country_01 | Local_06 | Metals | I | II | Male | Third Party | Chemical substances | At the moment the forklift operator went to ma... |
1.3 Shape of the Imported Dataset
#shape Report
shape = data.shape
print('\033[1mReport:\033[0m\033[1m The DataFrame as\033[94m {}\033[0m\033[1m Rows and\033[0m\033[91m {} \033[0m\033[1mColumns\033[0m'.format(shape[0],shape[1]))
Report: The DataFrame as 425 Rows and 10 Columns
1.4 Head and Tail of the Dataset
# head
head = data.head()
print('\033[1mReport: The Top 5 Rows in the DataFrame:\033[0m')
display(head)
#tail
tail = data.tail()
print('\033[1m\nReport: The Bottom 5 Rows in the DataFrame:\n\033[0m')
display(tail)
Report: The Top 5 Rows in the DataFrame:
| Data | Countries | Local | Industry Sector | Accident Level | Potential Accident Level | Genre | Employee or Third Party | Critical Risk | Description | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 00:00:00 | Country_01 | Local_01 | Mining | I | IV | Male | Third Party | Pressed | While removing the drill rod of the Jumbo 08 f... |
| 1 | 2016-01-02 00:00:00 | Country_02 | Local_02 | Mining | I | IV | Male | Employee | Pressurized Systems | During the activation of a sodium sulphide pum... |
| 2 | 2016-01-06 00:00:00 | Country_01 | Local_03 | Mining | I | III | Male | Third Party (Remote) | Manual Tools | In the sub-station MILPO located at level +170... |
| 3 | 2016-01-08 00:00:00 | Country_01 | Local_04 | Mining | I | I | Male | Third Party | Others | Being 9:45 am. approximately in the Nv. 1880 C... |
| 4 | 2016-01-10 00:00:00 | Country_01 | Local_04 | Mining | IV | IV | Male | Third Party | Others | Approximately at 11:45 a.m. in circumstances t... |
Report: The Bottom 5 Rows in the DataFrame:
| Data | Countries | Local | Industry Sector | Accident Level | Potential Accident Level | Genre | Employee or Third Party | Critical Risk | Description | |
|---|---|---|---|---|---|---|---|---|---|---|
| 420 | 2017-07-04 00:00:00 | Country_01 | Local_04 | Mining | I | III | Male | Third Party | Others | Being approximately 5:00 a.m. approximately, w... |
| 421 | 2017-07-04 00:00:00 | Country_01 | Local_03 | Mining | I | II | Female | Employee | Others | The collaborator moved from the infrastructure... |
| 422 | 2017-07-05 00:00:00 | Country_02 | Local_09 | Metals | I | II | Male | Employee | Venomous Animals | During the environmental monitoring activity i... |
| 423 | 2017-07-06 00:00:00 | Country_02 | Local_05 | Metals | I | II | Male | Employee | Cut | The Employee performed the activity of strippi... |
| 424 | 2017-07-09 00:00:00 | Country_01 | Local_04 | Mining | I | II | Female | Third Party | Fall prevention (same level) | At 10:00 a.m., when the assistant cleaned the ... |
1.5 Overview of Dataset
#using info fucntion getting columns name, datatypes and null value counts in the dataset
print('\033[1m\nReport: Information about a DataFrame:\n\033[0m')
display(data.info(verbose=True,memory_usage=False,show_counts=True))
Report: Information about a DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 425 entries, 0 to 424
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Data 425 non-null object
1 Countries 425 non-null object
2 Local 425 non-null object
3 Industry Sector 425 non-null object
4 Accident Level 425 non-null object
5 Potential Accident Level 425 non-null object
6 Genre 425 non-null object
7 Employee or Third Party 425 non-null object
8 Critical Risk 425 non-null object
9 Description 425 non-null object
dtypes: object(10)
None
1.6 Unique Elements in the each columns of the Dataset
#Unique value Count
print('\033[1m\n\nReport: Unique value Count and Value in the Dataset Columns-Wise:\n\033[0m')
#unique Values
for col in data.columns:
val = data[col].nunique()
perc = round((val/data.shape[0])*100,2)
print(f'\t\033[1mUnique values of "{col}" column as {val}({perc})%\n\033[0m');
#filtering the the columns having less unique values
if perc <= 50.00:
print('\t',data[col].unique()) #printing the unique values in the columns
print('\n')
Report: Unique value Count and Value in the Dataset Columns-Wise: Unique values of "Data" column as 287(67.53)% Unique values of "Countries" column as 3(0.71)% ['Country_01' 'Country_02' 'Country_03'] Unique values of "Local" column as 12(2.82)% ['Local_01' 'Local_02' 'Local_03' 'Local_04' 'Local_05' 'Local_06' 'Local_07' 'Local_08' 'Local_10' 'Local_09' 'Local_11' 'Local_12'] Unique values of "Industry Sector" column as 3(0.71)% ['Mining' 'Metals' 'Others'] Unique values of "Accident Level" column as 5(1.18)% ['I' 'IV' 'III' 'II' 'V'] Unique values of "Potential Accident Level" column as 6(1.41)% ['IV' 'III' 'I' 'II' 'V' 'VI'] Unique values of "Genre" column as 2(0.47)% ['Male' 'Female'] Unique values of "Employee or Third Party" column as 3(0.71)% ['Third Party' 'Employee' 'Third Party (Remote)'] Unique values of "Critical Risk" column as 33(7.76)% ['Pressed' 'Pressurized Systems' 'Manual Tools' 'Others' 'Fall prevention (same level)' 'Chemical substances' 'Liquid Metal' 'Electrical installation' 'Confined space' 'Pressurized Systems / Chemical Substances' 'Blocking and isolation of energies' 'Suspended Loads' 'Poll' 'Cut' 'Fall' 'Bees' 'Fall prevention' '\nNot applicable' 'Traffic' 'Projection' 'Venomous Animals' 'Plates' 'Projection/Burning' 'remains of choco' 'Vehicles and Mobile Equipment' 'Projection/Choco' 'Machine Protection' 'Power lock' 'Burn' 'Projection/Manual Tools' 'Individual protection equipment' 'Electrical Shock' 'Projection of fragments'] Unique values of "Description" column as 411(96.71)%
2.1 Renaming Columns
#renaming the Data to Date, Countries to country and gener to Gender, Employee or third party to Employee Type
data.rename(columns = {'Data':'Date','Countries':'Country','Genre':'Gender','Employee or Third Party':'Employee Type'}, inplace = True)
data[:5]
| Date | Country | Local | Industry Sector | Accident Level | Potential Accident Level | Gender | Employee Type | Critical Risk | Description | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 00:00:00 | Country_01 | Local_01 | Mining | I | IV | Male | Third Party | Pressed | While removing the drill rod of the Jumbo 08 f... |
| 1 | 2016-01-02 00:00:00 | Country_02 | Local_02 | Mining | I | IV | Male | Employee | Pressurized Systems | During the activation of a sodium sulphide pum... |
| 2 | 2016-01-06 00:00:00 | Country_01 | Local_03 | Mining | I | III | Male | Third Party (Remote) | Manual Tools | In the sub-station MILPO located at level +170... |
| 3 | 2016-01-08 00:00:00 | Country_01 | Local_04 | Mining | I | I | Male | Third Party | Others | Being 9:45 am. approximately in the Nv. 1880 C... |
| 4 | 2016-01-10 00:00:00 | Country_01 | Local_04 | Mining | IV | IV | Male | Third Party | Others | Approximately at 11:45 a.m. in circumstances t... |
2.2 Checking of any Null Values
#null Values
null_value = data.isnull().sum().sum() #taking null values count
if null_value == 0: #check for zero null value
print("\n\033[1mReport: No Null Values in the DataFrame\033[0m")
else:
print("\n\033[1mReport: The Dataset as {} Null Value:\033[0m".format(null_value))
print("\tAction: Filling the Null value by Mean")
data.fillna(df.mean(),inplace=True) #else replace null values by the mean value fro the columns
plt.title("Heatmap of Null Value")
sns.heatmap(data.isnull(), cbar=False) #if null value available show me the heat map
plt.show()
Report: No Null Values in the DataFrame
2.3 Checking of Duplicate Values
#Duplicate Values
dupli = data.duplicated().sum() #checking duplicate value using duplicated function
if dupli == 0:
print("\033[1m\nReport: No Duplicate Rows in the DataFrame\033[0m")
else:
#if duplicate found remove the duplicate rows
print("\033[1m\nReport: The Dataset as {} Duplicate Rows\033[0m".format(dupli))
print('\tAction: Dropping the Duplicate Rows')
data.drop_duplicates(inplace=True)
data = data.reset_index(drop=True)
print("\tShape after Droping the Duplicate Values: {} Rows {} Columns".format(data.shape[0],data.shape[1]))
Report: The Dataset as 7 Duplicate Rows
Action: Dropping the Duplicate Rows
Shape after Droping the Duplicate Values: 418 Rows 10 Columns
3.a Extracting Additional Information from 'Date' Column for EDA
#removing timestamp for the date columns using to_datetime function
data['Date'] = pd.to_datetime(data['Date'])
#Extracting the Date Columns into Year,Month,day of the date
data['Day'] = data.Date.apply(lambda x : x.day_name())
data['Month'] = data.Date.apply(lambda x : x.month)
data['Year'] = data.Date.apply(lambda x : x.year)
data[:5]
| Date | Country | Local | Industry Sector | Accident Level | Potential Accident Level | Gender | Employee Type | Critical Risk | Description | Day | Month | Year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 | Country_01 | Local_01 | Mining | I | IV | Male | Third Party | Pressed | While removing the drill rod of the Jumbo 08 f... | Friday | 1 | 2016 |
| 1 | 2016-01-02 | Country_02 | Local_02 | Mining | I | IV | Male | Employee | Pressurized Systems | During the activation of a sodium sulphide pum... | Saturday | 1 | 2016 |
| 2 | 2016-01-06 | Country_01 | Local_03 | Mining | I | III | Male | Third Party (Remote) | Manual Tools | In the sub-station MILPO located at level +170... | Wednesday | 1 | 2016 |
| 3 | 2016-01-08 | Country_01 | Local_04 | Mining | I | I | Male | Third Party | Others | Being 9:45 am. approximately in the Nv. 1880 C... | Friday | 1 | 2016 |
| 4 | 2016-01-10 | Country_01 | Local_04 | Mining | IV | IV | Male | Third Party | Others | Approximately at 11:45 a.m. in circumstances t... | Sunday | 1 | 2016 |
As we know, this database comes from one of the biggest industry in Brazil which has four climatological seasos as below.
https://seasonsyear.com/Brazil
We can create seasonal variable based on month variable.
# function to create month variable into seasons
def month2seasons(x):
if x in [9, 10, 11]:
season = 'Spring'
elif x in [12, 1, 2]:
season = 'Summer'
elif x in [3, 4, 5]:
season = 'Autumn'
elif x in [6, 7, 8]:
season = 'Winter'
return season
data['Season'] = data['Month'].apply(month2seasons)
data.head(3)
| Date | Country | Local | Industry Sector | Accident Level | Potential Accident Level | Gender | Employee Type | Critical Risk | Description | Day | Month | Year | Season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 | Country_01 | Local_01 | Mining | I | IV | Male | Third Party | Pressed | While removing the drill rod of the Jumbo 08 f... | Friday | 1 | 2016 | Summer |
| 1 | 2016-01-02 | Country_02 | Local_02 | Mining | I | IV | Male | Employee | Pressurized Systems | During the activation of a sodium sulphide pum... | Saturday | 1 | 2016 | Summer |
| 2 | 2016-01-06 | Country_01 | Local_03 | Mining | I | III | Male | Third Party (Remote) | Manual Tools | In the sub-station MILPO located at level +170... | Wednesday | 1 | 2016 | Summer |
3.1.a Percentage of Accidents occured by Country
#getting values of country in the dataset by value counts
value = data['Country'].value_counts()
#ploting a pie chart using plotly with title and color sequence
fig = px.pie(values = value, names=["Country_01","Country_02",'Country_03'],
title='Percentage of Incident over Countries',
color_discrete_sequence=px.colors.sequential.Teal_r,
width=800, height=700)
fig.show() #showing plt
3.1.b Distribustion of Accidents occured by Location
#creating count variable by the count of accidents in location with normalization parameter and converting into pecentage to get the distribustion
count = data['Local'].value_counts(normalize=True) * 100
hv.extension('bokeh') #intiazing the holoviews instance hv with bokeh extension
# Ploting Bar Graph from the percentage of value obtained
hv.Bars(count).opts(title="Distribustion of Accidents occured by Location", color='#D4EFDF', xlabel="Locations", ylabel="Percentage", yformatter='%d%%')\
.opts(opts.Bars(width=700, height=400,tools=['hover']))
3.1.c Distribustion of Accidents occured in Industry
#creating count variable by the count of accidents in Industry Sector with normalization parameter and converting into pecentage to get the distribustion
count = data['Industry Sector'].value_counts(normalize=True) * 100
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(count).opts(title="Distribustion of Accidents occured in Industry", color="#D4EFDF", xlabel="Industry Sector", ylabel="Percentage", yformatter='%d%%')\
.opts(opts.Bars(width=400, height=300,tools=['hover']))
3.1.d Distribustion of Accidents Level and Potential Accident Level
#creating ac_level variable by the count of accidents Level with normalization parameter and converting into pecentage to get the distribustion
ac_level = data['Accident Level'].value_counts(normalize=True) * 100
#creating po_level variable by the count of Potential accidents Level with normalization parameter and converting into pecentage to get the distribustion
po_level = data['Potential Accident Level'].value_counts(normalize=True) * 100
#making count variable for plot by concat ac_level & po_level with
#sort fucntion to False not arrage them and remaing the Columns
count = pd.concat([ac_level,po_level],axis=1,sort=False).fillna(0).rename(columns={'Accident Level':'Accident', 'Potential Accident Level':'Potential'})
#using pd.melt function making the identifier variables for ploting
count = pd.melt(count.reset_index(), ['index']).rename(columns={'index':'Severity', 'variable':'Levels'})
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(count, ['Severity', 'Levels'], 'value').opts(opts.Bars(title="Distribustion of Accidents Level and Potential Accident Level", width=700, height=300,tools=['hover'],\
xrotation=45, ylabel="Percentage", yformatter='%d%%'))
Accident Level label¶Accident Level - I count: 309 i.e. 74.0% Accident Level - II count: 40 i.e. 10.0% Accident Level - III count: 31 i.e. 7.0% Accident Level - IV count: 30 i.e. 7.0% Accident Level - V count: 8 i.e. 2.0%
Potential Accident Level - I count: 45 i.e. 11.0% Potential Accident Level - II count: 95 i.e. 23.0% Potential Accident Level - III count: 106 i.e. 25.0% Potential Accident Level - IV count: 141 i.e. 34.0% Potential Accident Level - V count: 30 i.e. 7.0% Potential Accident Level - VI count: 1 i.e. 0.0%
3.1.e Distribustion of Accidents Level by Gender
#creating value variable by the count of accidents Level with normalization parameter and converting into pecentage to get the distribustion
count = data['Gender'].value_counts(normalize=True) * 100
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(count).opts(title="Distribustion of Gender", color="#D4EFDF", xlabel="Gender", ylabel="Percentage", yformatter='%d%%')\
.opts(opts.Bars(width=500, height=400,tools=['hover']))
3.1.f Distribustion of Accidents by Employee Type
#creating value variable by the count of Employee Type with normalization parameter and converting into pecentage to get the distribustion
count = data['Employee Type'].value_counts(normalize=True) * 100
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(count).opts(title="Distribustion of Employee Type", color="#D4EFDF", xlabel="Employee Type", ylabel="Percentage", yformatter='%d%%')\
.opts(opts.Bars(width=500, height=400,tools=['hover']))
3.1.g Distribustion of Accidents by Critical Risk
#creating value variable by the count of Critical Risk with normalization parameter and converting into pecentage to get the distribustion
count = data['Critical Risk'].value_counts(normalize=True) * 100
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(count[::-1]).opts(title="Distribustion of Critical Risk", color="#D4EFDF", xlabel="Critical Risk", ylabel="Percentage", xformatter='%d%%')\
.opts(opts.Bars(width=700, height=700,tools=['hover'],invert_axes=True))
Because most part of the Critical Risks are classified as 'Others', it is thought that there are too many risks to classify precisely.
And it is also thought that it takes so much time to analyze risks and reasons why the accidents occur.
3.1.h Distribustion of Accidents by year and Month
#creating year count variable by the count of Year with normalization parameter and converting into pecentage to get the distribustion
year_count = data['Year'].value_counts(normalize=True) * 100
#creating a bar plot of the year count and assign it to year
year = hv.Bars(year_count).opts(title="Distribustion of of Years", color="#D4EFDF", xlabel="Critical Risk", ylabel="Percentage", yformatter='%d%%')
#creating month count variable by the count of month with normalization parameter and converting into pecentage to get the distribustion
month_count = data['Month'].value_counts(normalize=True,sort=False) * 100
#creating a bar plot of the month count and assign it to month
month = hv.Bars(month_count).opts(title="Distribustion of Months", color="#D4EFDF", xlabel="Critical Risk", ylabel="Percentage", yformatter='%d%%')* hv.Curve(month_count).opts(color='red', line_width=3)
#Ploting Bar Graph by adding the year and month values
(year+month).opts(opts.Bars(width=400, height=300,tools=['hover'],show_grid=True, ylabel="Percentage", yformatter='%d%%')).cols(2)
3.1.i Distribustion of Accidents by Season
#creating count variable by the count of Season with normalization parameter and converting into pecentage to get the distribustion
count = data['Season'].value_counts(normalize=True) * 100
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(count).opts(title="Distribustion of Season",color="#D4EFDF",xlabel="Seasons", ylabel="Percentage")\
.opts(opts.Bars(width=500, height=400,tools=['hover']))
3.2.a Distribution of industry sector different significantly in differ countries
#creating count variable by grouping the 'Country' over 'Industry Sector' getting the count by applying the lambda function f and upstacking for plot
count = data.groupby(['Industry Sector','Country'])['Industry Sector'].count().unstack().apply(f, axis=1)
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(pd.melt(count.reset_index(), ['Industry Sector']), ['Industry Sector','Country'], 'value')\
.opts(opts.Bars(title="Industry type in the Countries", width=900, height=400,tools=['hover'],xrotation=0, ylabel="Percentage", yformatter='%d%%'))
3.2.b Distribution of employee type differ significantly in different genders
#creating count variable by grouping the 'gender' over 'Employee Type' getting the count by applying the lambda function f and upstacking for plot
count = data.groupby(['Gender','Employee Type'])['Employee Type'].count().unstack().apply(f, axis=1)
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(pd.melt(count.reset_index(), ['Gender']), ['Gender','Employee Type'], 'value').opts(opts.Bars(title="Employee type by Gender Count", width=800, height=300,tools=['hover'],\
show_grid=True,xrotation=0, ylabel="Percentage", yformatter='%d%%'))
3.2.c Distribution of industry sector different significantly in differ Gender
#creating count variable by grouping the 'gender' over 'Industry Sector' getting the count by applying the lambda function f and upstacking for plot
count = data.groupby(['Gender','Industry Sector'])['Industry Sector'].count().unstack().apply(f, axis=1)
#Ploting Bar Graph from the percentage of value obtained
hv.Bars(pd.melt(count.reset_index(), ['Gender']), ['Gender','Industry Sector'], 'value').opts(opts.Bars(title="Industry Sector by Gender Count", width=800, height=300,tools=['hover'],\
show_grid=True,xrotation=0, ylabel="Percentage", yformatter='%d%%'))
3.2.d Distribution of industry sector different significantly in differ countries
#creating count variable by grouping the 'gender' over 'Accident level' getting the count by applying the lambda function f and upstacking for plot
ac_gen = data.groupby(['Gender','Accident Level'])['Accident Level'].count().unstack().apply(f, axis=1)
ac = hv.Bars(pd.melt(ac_gen.reset_index(), ['Gender']), ['Gender','Accident Level'], 'value').opts(opts.Bars(title="Accident Level by Gender Count"))
#creating count variable by grouping the 'gender' over 'potential Accident level' getting the count by applying the lambda function f and upstacking for plot
pot_ac_gen = data.groupby(['Gender','Potential Accident Level'])['Potential Accident Level'].count().unstack().apply(f, axis=1)
pot_ac = hv.Bars(pd.melt(pot_ac_gen.reset_index(), ['Gender']), ['Gender','Potential Accident Level'], 'value').opts(opts.Bars(title="Potential Accident Level by Gender Count"))
#Ploting Bar Graph from the percentage of value obtained
(ac + pot_ac).opts(opts.Bars(width=400, height=300,tools=['hover'],show_grid=True,xrotation=0, ylabel="Percentage", yformatter='%d%%'))
3.2.e Distribution of industry sector different significantly in differ countries
#creating count variable by grouping the 'Employee Type over 'Accident level' getting the count by applying the lambda function f and upstacking for plot
ac_em = data.groupby(['Employee Type','Accident Level'])['Accident Level'].count().unstack().apply(f, axis=1)
ac = hv.Bars(pd.melt(ac_em.reset_index(), ['Employee Type']), ['Employee Type','Accident Level'], 'value').opts(opts.Bars(title="Accident Level by Employee type Count"))
#creating count variable by grouping the 'Employee Type' over 'potential Accident level' getting the count by applying the lambda function f and upstacking for plot
pot_ac_em = data.groupby(['Employee Type','Potential Accident Level'])['Potential Accident Level'].count().unstack().apply(f, axis=1)
pot_ac = hv.Bars(pd.melt(pot_ac_em.reset_index(), ['Employee Type']), ['Employee Type','Potential Accident Level'], 'value').opts(opts.Bars(title="Potential Accident Level by Employee type Count"))
#Ploting Bar Graph from the percentage of value obtained
(ac + pot_ac).opts(opts.Bars(width=400, height=300,tools=['hover'],show_grid=True,xrotation=0, ylabel="Percentage", yformatter='%d%%',fontsize={'title':9}))
3.2.f Distribution of Accident Level and Potential Accident Level different significantly in differ Months
#creating count variable by grouping the 'Month over 'Accident level' getting the count by applying the lambda function f and upstacking for plot
ac_mo = data.groupby(['Month','Accident Level'])['Accident Level'].count().unstack().apply(f, axis=1).fillna(0)
ac = hv.Curve(ac_mo['I'], label='I') * hv.Curve(ac_mo['II'], label='II') * hv.Curve(ac_mo['III'], label='III') * hv.Curve(ac_mo['IV'], label='IV') * hv.Curve(ac_mo['V'], label='V')\
.opts(opts.Curve(title="Accident Level by Month Count"))
#creating count variable by grouping the 'Month' over 'potential Accident level' getting the count by applying the lambda function f and upstacking for plot
pot_ac_mo = data.groupby(['Month','Potential Accident Level'])['Potential Accident Level'].count().unstack().apply(f, axis=1).fillna(0)
pot_ac = hv.Curve(pot_ac_mo['I'], label='I') * hv.Curve(pot_ac_mo['II'], label='II') * hv.Curve(pot_ac_mo['III'], label='III') * hv.Curve(pot_ac_mo['IV'], label='IV')\
* hv.Curve(pot_ac_mo['V'], label='V') * hv.Curve(pot_ac_mo['VI'], label='VI').opts(opts.Curve(title="Potential Accident Level by Month Count"))
#Ploting Bar Graph from the percentage of value obtained
(ac+pot_ac).opts(opts.Curve(width=800, height=300,tools=['hover'],show_grid=True, ylabel="Percentage", yformatter='%d%%')).cols(1)
Distribution of industry sector differ significantly in each country. But let's check the proportion of metals, mining and others sector in Country_01 and is that difference is statistically significant?
Proportion of third party employees in each gender is equal.
Proportion of own employees in each gender is not equal. But let's check is that difference is statistically significant?
Proportion of Metals sector employees in each gender is not equal.
Proportion of Others sector employees in each gender is not equal.
Proportion of accident levels in each gender is not equal and males have a higher accident levels than females.
For both accident levels, the incidence of Employee is higher at low accident levels, but the incidence of Third parties seems to be slightly higher at high accident levels.
Both of the two accident level have the tendency that non-severe levels decreased throughout the year, but severe levels did not changed much, and some of these levels increased slightly in the second half of the year.
data[:2]
| Date | Country | Local | Industry Sector | Accident Level | Potential Accident Level | Gender | Employee Type | Critical Risk | Description | Day | Month | Year | Season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 | Country_01 | Local_01 | Mining | I | IV | Male | Third Party | Pressed | While removing the drill rod of the Jumbo 08 f... | Friday | 1 | 2016 | Summer |
| 1 | 2016-01-02 | Country_02 | Local_02 | Mining | I | IV | Male | Employee | Pressurized Systems | During the activation of a sodium sulphide pum... | Saturday | 1 | 2016 | Summer |
The columns 'Accident Level', 'Potential Accident Level', 'Day' and 'Season' are ordinal type, We are using Label Encoding from sklearn Preprocessing to convert them into numberical values
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
data['Accident Level'] = encoder.fit_transform(data['Accident Level'])
data['Potential Accident Level'] = encoder.fit_transform(data['Potential Accident Level'])
data['Day'] = encoder.fit_transform(data['Day'])
data['Season'] = encoder.fit_transform(data['Season'])
data.to_csv('Data.csv')
For the columns 'Country','Local','Industry Sector','Gender','Employee Type','Critical Risk' we are using get_dummies for pandas to convert them into categorical.
dummy_list = ['Country','Local','Industry Sector','Gender','Employee Type','Critical Risk']
data = pd.get_dummies(data,columns=dummy_list,drop_first=True)
data[:2]
| Date | Accident Level | Potential Accident Level | Description | Day | Month | Year | Season | Country_Country_02 | Country_Country_03 | ... | Critical Risk_Projection | Critical Risk_Projection of fragments | Critical Risk_Projection/Burning | Critical Risk_Projection/Choco | Critical Risk_Projection/Manual Tools | Critical Risk_Suspended Loads | Critical Risk_Traffic | Critical Risk_Vehicles and Mobile Equipment | Critical Risk_Venomous Animals | Critical Risk_remains of choco | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 | 0 | 3 | While removing the drill rod of the Jumbo 08 f... | 0 | 1 | 2016 | 2 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 2016-01-02 | 0 | 3 | During the activation of a sodium sulphide pum... | 2 | 1 | 2016 | 2 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 rows × 58 columns
NPL Preprocessing for the Description Column
data['Cleaned_Description'] = data['Description'].apply(lambda x : x.lower())
data['Cleaned_Description'] = data['Cleaned_Description'].apply(lambda x : re.sub(r"[^a-zA-Z]", " ",x))
data['Cleaned_Description'] = data['Cleaned_Description'].apply(lambda x : x.strip())
from nltk.corpus import stopwords
def remove_stopwords(data):
return ' '.join([word for word in data.split() if word not in stopwords.words('english')])
data['Cleaned_Description'] = data['Cleaned_Description'].apply(lambda x : remove_stopwords(x))
clean_data = data.drop(['Description','Date'],axis=1)
clean_data.to_csv('Cleaned Data.csv')
data
| Date | Accident Level | Potential Accident Level | Description | Day | Month | Year | Season | Country_Country_02 | Country_Country_03 | ... | Critical Risk_Projection of fragments | Critical Risk_Projection/Burning | Critical Risk_Projection/Choco | Critical Risk_Projection/Manual Tools | Critical Risk_Suspended Loads | Critical Risk_Traffic | Critical Risk_Vehicles and Mobile Equipment | Critical Risk_Venomous Animals | Critical Risk_remains of choco | Cleaned_Description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2016-01-01 | 0 | 3 | While removing the drill rod of the Jumbo 08 f... | 0 | 1 | 2016 | 2 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | removing drill rod jumbo maintenance superviso... |
| 1 | 2016-01-02 | 0 | 3 | During the activation of a sodium sulphide pum... | 2 | 1 | 2016 | 2 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | activation sodium sulphide pump piping uncoupl... |
| 2 | 2016-01-06 | 0 | 2 | In the sub-station MILPO located at level +170... | 6 | 1 | 2016 | 2 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | sub station milpo located level collaborator e... |
| 3 | 2016-01-08 | 0 | 0 | Being 9:45 am. approximately in the Nv. 1880 C... | 0 | 1 | 2016 | 2 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | approximately nv cx ob personnel begins task u... |
| 4 | 2016-01-10 | 3 | 3 | Approximately at 11:45 a.m. in circumstances t... | 3 | 1 | 2016 | 2 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | approximately circumstances mechanics anthony ... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 413 | 2017-07-04 | 0 | 2 | Being approximately 5:00 a.m. approximately, w... | 5 | 7 | 2017 | 3 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | approximately approximately lifting kelly hq t... |
| 414 | 2017-07-04 | 0 | 1 | The collaborator moved from the infrastructure... | 5 | 7 | 2017 | 3 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | collaborator moved infrastructure office julio... |
| 415 | 2017-07-05 | 0 | 1 | During the environmental monitoring activity i... | 6 | 7 | 2017 | 3 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | environmental monitoring activity area employe... |
| 416 | 2017-07-06 | 0 | 1 | The Employee performed the activity of strippi... | 4 | 7 | 2017 | 3 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | employee performed activity stripping cathodes... |
| 417 | 2017-07-09 | 0 | 1 | At 10:00 a.m., when the assistant cleaned the ... | 3 | 7 | 2017 | 3 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | assistant cleaned floor module e central camp ... |
418 rows × 59 columns
wordcloud = WordCloud(width = 1500, height = 800, random_state=0, background_color='black', colormap='rainbow', \
min_font_size=5, max_words=300, collocations=False).generate(" ".join(clean_data['Cleaned_Description'].values))
plt.figure(figsize=(15,10))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
# Word Embedding for Glove
embeddings = {}
with open("glove.6B.200d.txt", 'r', encoding="utf-8") as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], "float32")
embeddings[word] = vector
def sen2vec(sentence):
words = [word for word in sentence if word.isalpha()]
m = []
for w in words:
try:
m.append(embeddings[w])
except:
continue
m=np.array(m)
v=m.sum(axis=0)
if type(v) != np.ndarray:
return np.zeros(300)
return v/np.sqrt((v**2).sum())
data_glove = [sen2vec(x) for x in clean_data['Cleaned_Description']]
clean_data = pd.concat([
clean_data.drop('Cleaned_Description',axis=1),
pd.DataFrame(data_glove)
],axis=1)
clean_data.to_csv('Clean Data.csv',index = False)
model_data = pd.read_csv('Clean data.csv')
model_data
| Accident Level | Potential Accident Level | Day | Month | Year | Season | Country_Country_02 | Country_Country_03 | Local_Local_02 | Local_Local_03 | ... | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | 0 | 1 | 2016 | 2 | 0 | 0 | 0 | 0 | ... | -0.127637 | -0.000666 | 0.032031 | 0.079318 | 0.040632 | 0.057523 | 0.021442 | 0.028593 | 0.078144 | 0.113700 |
| 1 | 0 | 3 | 2 | 1 | 2016 | 2 | 1 | 0 | 1 | 0 | ... | -0.125652 | -0.009883 | 0.030270 | 0.076656 | 0.036077 | 0.059379 | 0.021374 | 0.025844 | 0.078358 | 0.113305 |
| 2 | 0 | 2 | 6 | 1 | 2016 | 2 | 0 | 0 | 0 | 1 | ... | -0.139361 | 0.001010 | 0.042611 | 0.057901 | 0.038924 | 0.059659 | 0.020549 | 0.035147 | 0.085468 | 0.110301 |
| 3 | 0 | 0 | 0 | 1 | 2016 | 2 | 0 | 0 | 0 | 0 | ... | -0.135374 | -0.001651 | 0.036892 | 0.072390 | 0.037780 | 0.058873 | 0.026217 | 0.030182 | 0.084230 | 0.109283 |
| 4 | 3 | 3 | 3 | 1 | 2016 | 2 | 0 | 0 | 0 | 0 | ... | -0.131904 | -0.009413 | 0.034752 | 0.076336 | 0.041249 | 0.061630 | 0.024414 | 0.030427 | 0.077914 | 0.113101 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 413 | 0 | 2 | 5 | 7 | 2017 | 3 | 0 | 0 | 0 | 0 | ... | -0.136517 | -0.018269 | 0.045214 | 0.062673 | 0.040378 | 0.048878 | 0.019323 | 0.037798 | 0.064775 | 0.115871 |
| 414 | 0 | 1 | 5 | 7 | 2017 | 3 | 0 | 0 | 0 | 1 | ... | -0.137384 | -0.013098 | 0.028761 | 0.069102 | 0.039686 | 0.060512 | 0.022350 | 0.030974 | 0.085180 | 0.119045 |
| 415 | 0 | 1 | 6 | 7 | 2017 | 3 | 1 | 0 | 0 | 0 | ... | -0.128977 | -0.013113 | 0.029272 | 0.079438 | 0.034494 | 0.051892 | 0.024344 | 0.026631 | 0.071254 | 0.115127 |
| 416 | 0 | 1 | 4 | 7 | 2017 | 3 | 1 | 0 | 0 | 0 | ... | -0.130375 | -0.014742 | 0.034502 | 0.083705 | 0.038112 | 0.066569 | 0.023667 | 0.028664 | 0.089785 | 0.105660 |
| 417 | 0 | 1 | 3 | 7 | 2017 | 3 | 0 | 0 | 0 | 0 | ... | -0.144036 | -0.016947 | 0.030251 | 0.081446 | 0.033104 | 0.062566 | 0.019634 | 0.019709 | 0.074901 | 0.115240 |
418 rows × 256 columns
#creating independent Variable X and target Variable
X = model_data.drop(['Accident Level','Potential Accident Level'],axis=1)
y = model_data['Accident Level']
#spliting dataset into 80% train and 20 % test
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.20, random_state = 12)
#displaying the shape of training and testing data
print("X Training as: ",Xtrain.shape)
print("X Testing as:",Xtest.shape)
print("y Training as: ",ytrain.shape)
print("y Testing as: ",ytest.shape)
X Training as: (334, 254) X Testing as: (84, 254) y Training as: (334,) y Testing as: (84,)
#showing the Imbalance in the data
ytrain.value_counts()
0 251 1 26 2 25 3 24 4 8 Name: Accident Level, dtype: int64
We found the imbalance in the training data target variable, so very creating two more train data by upsampling and SMOTE Method
#using smote function to balance the target columns
#instance for the SMOTE Function
smote = SMOTE()
#fit and transforming the target value
Xtrain_smote,ytrain_smote = smote.fit_resample(Xtrain,ytrain)
#printing the balanced target values
ytrain_smote.value_counts()
0 251 2 251 3 251 1 251 4 251 Name: Accident Level, dtype: int64
#scaling the data
#instance for the standard scaler
scaler = StandardScaler()
#fit and transforming the independent varibles
Xtrain_scale = scaler.fit_transform(Xtrain_smote)
Xtest_scale = scaler.fit_transform(Xtest)
#dimension reduction
#instance for the PCA with columns to keep of 84
pca = PCA(n_components=84)
#fit and transform the scaled independent varibles in to pca instacne
Xtrain_pca = pca.fit_transform(Xtrain_scale)
Xtest_pca = pca.fit_transform(Xtest_scale)
Creating the list of classification model to get the best Preformaing model
#creating the base model classifiers for the training
#creating the list
classifiers = []
#adding the Classifiers model into the created list with the name and model instance
classifiers.append(['Logistic Regression',LogisticRegression(multi_class='multinomial')])
classifiers.append(['Ridge Classifier',RidgeClassifier()])
classifiers.append(['K-Neighbors Classifier',KNeighborsClassifier()])
classifiers.append(['SVC',SVC()])
classifiers.append(['Decision Tree Classifier',DecisionTreeClassifier()])
classifiers.append(['Random Forest Classifier',RandomForestClassifier()])
classifiers.append(['Ada-Boost Classifier',AdaBoostClassifier()])
classifiers.append(['Gradient Boosting Classifier',GradientBoostingClassifier(n_estimators=50, learning_rate=0.1)])
classifiers.append(['Bagging Classifier',BaggingClassifier()])
classifiers.append(['Extra Tree Classifier',ExtraTreesClassifier()])
classifiers.append(['Cat Boost Classifier',CatBoostClassifier(loss_function="MultiClass", random_state=1, verbose=0)])
classifiers.append(['XGBoost Classifier',XGBClassifier(max_depth = 5, objective="multi:softmax", learning_rate = 0.1)])
#crating the train evalute function to train the models with inputs as the list of classifier and traing and testing data
def train_evaluate(classifiers,Xtrain,Xtest,ytrain,ytest):
#creating empty list to store the evaluated data
values = []
report = []
matrix = []
#ccreating a fro loop throught the model build list
for m in classifiers:
scores = []
model = m[1] #geting model instance from the list
print("\033[1m\nTraining: {} Classifier\033[0m".format(m[0]))
model.fit(Xtrain,ytrain) #training the model with the traing data
ypred = model.predict(Xtest) # making prediction
scores.append(m[0])
scores.append(round(model.score(Xtrain,ytrain)*100,4))
print("\t\033[1mTraining Score: \033[0m {}%".format(round(model.score(Xtrain,ytrain)*100,4)))
scores.append(round(model.score(Xtest,ytest)*100,4))
print("\t\033[1mTesting Score: \033[0m{}%".format(round(model.score(Xtest,ytest)*100,4)))
scores.append(round(accuracy_score(ypred,ytest)*100,4))
scores.append(round(precision_score(ypred,ytest,average='weighted')*100,4))
print("\t\033[1mPrecision Score: \033[0m {}%".format(round(precision_score(ypred,ytest,average='weighted')*100,4)))
scores.append(round(recall_score(ypred,ytest,average='weighted')*100,4))
print("\t\033[1mRecall Score: \033[0m {}%".format(round(recall_score(ypred,ytest,average='weighted')*100,4)))
scores.append(round(f1_score(ypred,ytest,average='weighted')*100,4))
print("\t\033[1mF1 Score: \033[0m {}%".format(round(f1_score(ypred,ytest,average='weighted')*100,4)))
print("\n")
values.append(scores)
report.append(m[0])
report.append(classification_report(ypred,ytest))
print("\033[1mClassification Report: \033[0m")
print(classification_report(ypred,ytest))
matrix.append(m[0])
matrix.append(confusion_matrix(ypred,ytest))
print("\n")
print("\033[1mConfusion Matrix: \033[0m")
print(confusion_matrix(ypred,ytest))
dataframe = pd.DataFrame(values,columns=['Model','Training Score','Testing Score','Accuracy Score',
'Precision Score','Recall Score','F1-Score'])
return dataframe,report,matrix
eval_data,report,matrix = train_evaluate(classifiers,Xtrain_pca,Xtest_pca,ytrain_smote,ytest)
Training: Logistic Regression Classifier Training Score: 89.1633% Testing Score: 19.0476% Precision Score: 19.1121% Recall Score: 19.0476% F1 Score: 15.5432% Classification Report: precision recall f1-score support 0 0.16 0.56 0.24 16 1 0.29 0.17 0.22 23 2 0.17 0.06 0.09 16 3 0.33 0.15 0.21 13 4 0.00 0.00 0.00 16 accuracy 0.19 84 macro avg 0.19 0.19 0.15 84 weighted avg 0.19 0.19 0.16 84 Confusion Matrix: [[ 9 3 2 2 0] [18 4 1 0 0] [13 2 1 0 0] [ 9 2 0 2 0] [ 9 3 2 2 0]] Training: Ridge Classifier Classifier Training Score: 83.3466% Testing Score: 20.2381% Precision Score: 17.2365% Recall Score: 20.2381% F1 Score: 14.5792% Classification Report: precision recall f1-score support 0 0.21 0.63 0.31 19 1 0.14 0.17 0.15 12 2 0.33 0.11 0.17 18 3 0.17 0.06 0.09 17 4 0.00 0.00 0.00 18 accuracy 0.20 84 macro avg 0.17 0.19 0.14 84 weighted avg 0.17 0.20 0.15 84 Confusion Matrix: [[12 3 2 2 0] [ 9 2 0 1 0] [13 3 2 0 0] [13 3 0 1 0] [11 3 2 2 0]] Training: K-Neighbors Classifier Classifier Training Score: 86.9323% Testing Score: 7.1429% Precision Score: 16.7439% Recall Score: 7.1429% F1 Score: 7.9756% Classification Report: precision recall f1-score support 0 0.02 1.00 0.03 1 1 0.14 0.17 0.15 12 2 0.33 0.09 0.14 23 3 0.17 0.04 0.06 28 4 0.00 0.00 0.00 20 accuracy 0.07 84 macro avg 0.13 0.26 0.08 84 weighted avg 0.17 0.07 0.08 84 Confusion Matrix: [[ 1 0 0 0 0] [ 9 2 0 1 0] [17 3 2 1 0] [22 4 1 1 0] [ 9 5 3 3 0]] Training: SVC Classifier Training Score: 98.9641% Testing Score: 40.4762% Precision Score: 34.7085% Recall Score: 40.4762% F1 Score: 35.7675% Classification Report: precision recall f1-score support 0 0.47 0.69 0.56 39 1 0.21 0.21 0.21 14 2 0.50 0.25 0.33 12 3 0.17 0.08 0.11 12 4 0.00 0.00 0.00 7 accuracy 0.40 84 macro avg 0.27 0.25 0.24 84 weighted avg 0.35 0.40 0.36 84 Confusion Matrix: [[27 8 2 2 0] [10 3 0 1 0] [ 7 1 3 1 0] [10 1 0 1 0] [ 4 1 1 1 0]] Training: Decision Tree Classifier Classifier Training Score: 99.9203% Testing Score: 34.5238% Precision Score: 25.4995% Recall Score: 34.5238% F1 Score: 28.0405% Classification Report: precision recall f1-score support 0 0.43 0.71 0.54 35 1 0.14 0.14 0.14 14 2 0.00 0.00 0.00 10 3 0.33 0.15 0.21 13 4 0.00 0.00 0.00 12 accuracy 0.35 84 macro avg 0.18 0.20 0.18 84 weighted avg 0.25 0.35 0.28 84 Confusion Matrix: [[25 5 2 3 0] [11 2 1 0 0] [ 7 2 0 1 0] [ 7 2 2 2 0] [ 8 3 1 0 0]] Training: Random Forest Classifier Classifier Training Score: 99.9203% Testing Score: 52.381% Precision Score: 52.9146% Recall Score: 52.381% F1 Score: 52.4348% Classification Report: precision recall f1-score support 0 0.72 0.70 0.71 60 1 0.07 0.14 0.10 7 2 0.17 0.33 0.22 3 3 0.00 0.00 0.00 6 4 0.00 0.00 0.00 8 accuracy 0.52 84 macro avg 0.19 0.24 0.21 84 weighted avg 0.53 0.52 0.52 84 Confusion Matrix: [[42 10 3 5 0] [ 4 1 1 1 0] [ 2 0 1 0 0] [ 4 2 0 0 0] [ 6 1 1 0 0]] Training: Ada-Boost Classifier Classifier Training Score: 65.259% Testing Score: 11.9048% Precision Score: 37.1648% Recall Score: 11.9048% F1 Score: 12.1663% Classification Report: precision recall f1-score support 0 0.07 0.50 0.12 8 1 0.07 0.07 0.07 14 2 0.17 0.10 0.12 10 3 0.67 0.10 0.17 42 4 0.00 0.00 0.00 10 accuracy 0.12 84 macro avg 0.19 0.15 0.10 84 weighted avg 0.37 0.12 0.12 84 Confusion Matrix: [[ 4 2 1 1 0] [12 1 0 1 0] [ 7 2 1 0 0] [28 7 3 4 0] [ 7 2 1 0 0]] Training: Gradient Boosting Classifier Classifier Training Score: 99.9203% Testing Score: 22.619% Precision Score: 13.9475% Recall Score: 22.619% F1 Score: 15.4928% Classification Report: precision recall f1-score support 0 0.28 0.67 0.39 24 1 0.14 0.12 0.13 17 2 0.00 0.00 0.00 17 3 0.17 0.06 0.09 16 4 0.00 0.00 0.00 10 accuracy 0.23 84 macro avg 0.12 0.17 0.12 84 weighted avg 0.14 0.23 0.15 84 Confusion Matrix: [[16 5 0 3 0] [12 2 3 0 0] [12 4 0 1 0] [12 2 1 1 0] [ 6 1 2 1 0]] Training: Bagging Classifier Classifier Training Score: 99.8406% Testing Score: 47.619% Precision Score: 42.1358% Recall Score: 47.619% F1 Score: 44.0851% Classification Report: precision recall f1-score support 0 0.60 0.73 0.66 48 1 0.14 0.20 0.17 10 2 0.17 0.17 0.17 6 3 0.33 0.17 0.22 12 4 0.00 0.00 0.00 8 accuracy 0.48 84 macro avg 0.25 0.25 0.24 84 weighted avg 0.42 0.48 0.44 84 Confusion Matrix: [[35 9 2 2 0] [ 5 2 2 1 0] [ 3 1 1 1 0] [ 9 1 0 2 0] [ 6 1 1 0 0]] Training: Extra Tree Classifier Classifier Training Score: 99.9203% Testing Score: 51.1905% Precision Score: 47.6601% Recall Score: 51.1905% F1 Score: 48.2426% Classification Report: precision recall f1-score support 0 0.62 0.72 0.67 50 1 0.14 0.29 0.19 7 2 0.50 0.50 0.50 6 3 0.33 0.13 0.19 15 4 0.00 0.00 0.00 6 accuracy 0.51 84 macro avg 0.32 0.33 0.31 84 weighted avg 0.48 0.51 0.48 84 Confusion Matrix: [[36 8 2 4 0] [ 5 2 0 0 0] [ 3 0 3 0 0] [10 3 0 2 0] [ 4 1 1 0 0]] Training: Cat Boost Classifier Classifier Training Score: 99.9203% Testing Score: 21.4286% Precision Score: 23.4831% Recall Score: 21.4286% F1 Score: 16.9225% Classification Report: precision recall f1-score support 0 0.19 0.73 0.30 15 1 0.14 0.11 0.12 19 2 0.33 0.12 0.17 17 3 0.50 0.18 0.26 17 4 0.00 0.00 0.00 16 accuracy 0.21 84 macro avg 0.23 0.23 0.17 84 weighted avg 0.23 0.21 0.17 84 Confusion Matrix: [[11 3 1 0 0] [13 2 2 2 0] [12 2 2 1 0] [10 4 0 3 0] [12 3 1 0 0]] Training: XGBoost Classifier Classifier Training Score: 99.9203% Testing Score: 36.9048% Precision Score: 28.7347% Recall Score: 36.9048% F1 Score: 30.1229% Classification Report: precision recall f1-score support 0 0.41 0.77 0.54 31 1 0.36 0.25 0.29 20 2 0.17 0.08 0.11 12 3 0.17 0.08 0.11 13 4 0.00 0.00 0.00 8 accuracy 0.37 84 macro avg 0.22 0.24 0.21 84 weighted avg 0.29 0.37 0.30 84 Confusion Matrix: [[24 3 1 3 0] [11 5 3 1 0] [ 9 2 1 0 0] [10 2 0 1 0] [ 4 2 1 1 0]]
eval_data
| Model | Training Score | Testing Score | Accuracy Score | Precision Score | Recall Score | F1-Score | |
|---|---|---|---|---|---|---|---|
| 0 | Logistic Regression | 89.1633 | 19.0476 | 19.0476 | 19.1121 | 19.0476 | 15.5432 |
| 1 | Ridge Classifier | 83.3466 | 20.2381 | 20.2381 | 17.2365 | 20.2381 | 14.5792 |
| 2 | K-Neighbors Classifier | 86.9323 | 7.1429 | 7.1429 | 16.7439 | 7.1429 | 7.9756 |
| 3 | SVC | 98.9641 | 40.4762 | 40.4762 | 34.7085 | 40.4762 | 35.7675 |
| 4 | Decision Tree Classifier | 99.9203 | 34.5238 | 34.5238 | 25.4995 | 34.5238 | 28.0405 |
| 5 | Random Forest Classifier | 99.9203 | 52.3810 | 52.3810 | 52.9146 | 52.3810 | 52.4348 |
| 6 | Ada-Boost Classifier | 65.2590 | 11.9048 | 11.9048 | 37.1648 | 11.9048 | 12.1663 |
| 7 | Gradient Boosting Classifier | 99.9203 | 22.6190 | 22.6190 | 13.9475 | 22.6190 | 15.4928 |
| 8 | Bagging Classifier | 99.8406 | 47.6190 | 47.6190 | 42.1358 | 47.6190 | 44.0851 |
| 9 | Extra Tree Classifier | 99.9203 | 51.1905 | 51.1905 | 47.6601 | 51.1905 | 48.2426 |
| 10 | Cat Boost Classifier | 99.9203 | 21.4286 | 21.4286 | 23.4831 | 21.4286 | 16.9225 |
| 11 | XGBoost Classifier | 99.9203 | 36.9048 | 36.9048 | 28.7347 | 36.9048 | 30.1229 |
def hyper_tuning(name, model, X_train, y_train, param_grid):
start = time.time()
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv, error_score=0)
model_grid_result = grid_search.fit(X_train, y_train)
print(name)
print("Best F1_Score: %f using %s" % (model_grid_result.best_score_, model_grid_result.best_params_))
means = model_grid_result.cv_results_['mean_test_score']
stds = model_grid_result.cv_results_['std_test_score']
params = model_grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
if param == model_grid_result.best_params_:
print("%f (%f) with: %r" % (mean, stdev, param))
print("95% Confidence interval range: ({0:.4f} %, {1:.4f} %)".format(mean-(2*stdev), mean+(2*stdev)))
end = time.time() # note the end time
duration = end - start # calculate the total duration
print("Total duration" , duration, "\n")
return model_grid_result.best_estimator_
#creating base model
base_model = [
['Random Forest Classifier',RandomForestClassifier()],
['Extra Trees Classifier',ExtraTreesClassifier()],
]
#Parameter for random forest
rf_param_grid = {
'n_estimators': [10, 100, 1000],
'max_features': ['auto', 'sqrt', 'log2']
}
#Parameter for extra tree classifier
et_param_grid = {
'n_estimators': np.arange(10,100,10),
'max_features': ['auto', 'sqrt', 'log2'],
'min_samples_split': np.arange(2,15,1)
}
##calling hyper tuning function with base modela and with its parameters
for name, classifier in base_model:
if name == 'Random Forest Classifier':
rf_best_estimator = hyper_tuning(name, classifier, Xtrain, ytrain, rf_param_grid)
elif name == 'Extra Trees Classifier':
et_best_estimator = hyper_tuning(name, classifier, Xtrain, ytrain, et_param_grid)
Random Forest Classifier
Best F1_Score: 0.748604 using {'max_features': 'auto', 'n_estimators': 1000}
0.748604 (0.014397) with: {'max_features': 'auto', 'n_estimators': 1000}
95% Confidence interval range: (0.7198 %, 0.7774 %)
Total duration 137.30708599090576
Extra Trees Classifier
Best F1_Score: 0.752614 using {'max_features': 'sqrt', 'min_samples_split': 12, 'n_estimators': 10}
0.752614 (0.012650) with: {'max_features': 'sqrt', 'min_samples_split': 12, 'n_estimators': 10}
95% Confidence interval range: (0.7273 %, 0.7779 %)
Total duration 304.58112716674805
Methology to Predict the Accident Level & Critical Risk
Design, train and test Neural networks classifiers
A. Neural Network with all the Preprocessed Parameters to Predict "Accident Level".
B. Neural Network with all the Preprocessed Parameters to Predict "Critical Risk".
Design, train and test RNN or LSTM classifiers
A. Using Description Parameter to Predict the "Accident Level".
B. Using Description & Accident Level to Predict "Critical Risk".
C. Base LSTM Model to Predict "Accident Level" with Description.
D. Base LSTM Model to Predict "Critical Risk" with Description and "Accident Level".
E. Bi-directional LSTM Model to Predict "Accident Level" with Description.
F. Bi-directional LSTM Model to Predict "Critical Risk" with Description and "Accident Level".
1.A Creating as ANN model to Predict the Accident Level using the Preprocessed Data
#importing the preprocessed data as model data
model_data = pd.read_csv('Clean Data.csv')
#printing the shape and the Head of the imported preprocessed dataset
print("The shape of the dataset {} rows and {} columns".format(model_data.shape[0],model_data.shape[1]))
#preparing Train,Validate and Testing data for the Aritifical Neural Network
X = model_data.drop(['Accident Level',],axis=1)
y = model_data['Accident Level']
The shape of the dataset 418 rows and 256 columns
#function to Spliting data into train, test and vaildation for predictiong the accident level
def ACC_data_split(X,y):
#making train and test Split from the X and y
smote = SMOTE()
Xtrain_smote,ytrain_smote = smote.fit_resample(X,y)
Xtrain,Xtest,ytrain,ytest = train_test_split(Xtrain_smote,ytrain_smote,test_size=0.20,random_state=12)
#making validation split for the train data
Xtrain,Xval,ytrain,yval = train_test_split(Xtrain,ytrain,test_size=0.20,random_state=12)
#displaying the shape of the train, test and validation dataset
print("Independent Variables:\n")
print("The X-Train Data as {}".format(Xtrain.shape))
print("The X-Validation Data as {}".format(Xval.shape))
print("The X-Test Data as {}".format(Xtest.shape))
print("\nTarget Variables:\n")
print("The y-Train Data as {}".format(ytrain.shape))
print("The y-Validation Data as {}".format(yval.shape))
print("The y-Test Data as {}".format(ytest.shape))
return Xtrain,Xval,Xtest,ytrain,yval,ytest
Xtrain,Xval,Xtest,ytrain,yval,ytest = ACC_data_split(X,y)
Independent Variables: The X-Train Data as (988, 255) The X-Validation Data as (248, 255) The X-Test Data as (309, 255) Target Variables: The y-Train Data as (988,) The y-Validation Data as (248,) The y-Test Data as (309,)
#converting the target variable from numeric to categorical
ytrain_label = to_categorical(ytrain)
yval_label = to_categorical(yval)
#Preparining the independent variables for NN
scaler = StandardScaler()
Xtrain_scale = scaler.fit_transform(Xtrain)
Xval_scale = scaler.fit_transform(Xval)
Xtest_scale =scaler.fit_transform(Xtest)
#base model to predict the accident Level
def base_model(Xtrain,ytrain,Xval, yval):
#Buliding Model
model=Sequential() #sequential model
model.add(Dense(128,activation='relu',input_shape=(len(Xtrain[0]),))) #dense layer / input layer with length of the Xtrain rows
model.add(Dropout(0.5)) #dropout layer
model.add(Dense(64,activation='relu')) #hidden layer 1
model.add(Dense(64,activation='relu')) #hidden Layer 2
model.add(Dropout(0.5)) #dropout layer
model.add(Dense(len(ytrain[0]),activation='softmax')) #output layer with softmax activation function
adam=tf.keras.optimizers.Adam(0.001) #defining the adam optimizer function with learing ratw
model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['accuracy']) #compiling the model
history = model.fit(Xtrain,ytrain, validation_data=(Xval, yval),epochs=100)
return model,history
model,history = base_model(Xtrain_scale, ytrain_label,Xval_scale, yval_label)
Epoch 1/100 31/31 [==============================] - 2s 12ms/step - loss: 1.7847 - accuracy: 0.2358 - val_loss: 1.4026 - val_accuracy: 0.5000 Epoch 2/100 31/31 [==============================] - 0s 4ms/step - loss: 1.4625 - accuracy: 0.3745 - val_loss: 1.2659 - val_accuracy: 0.5444 Epoch 3/100 31/31 [==============================] - 0s 5ms/step - loss: 1.2730 - accuracy: 0.4868 - val_loss: 1.1166 - val_accuracy: 0.6048 Epoch 4/100 31/31 [==============================] - 0s 5ms/step - loss: 1.0971 - accuracy: 0.5779 - val_loss: 0.9662 - val_accuracy: 0.6694 Epoch 5/100 31/31 [==============================] - 0s 4ms/step - loss: 0.9781 - accuracy: 0.6113 - val_loss: 0.8464 - val_accuracy: 0.7298 Epoch 6/100 31/31 [==============================] - 0s 4ms/step - loss: 0.8609 - accuracy: 0.6852 - val_loss: 0.7342 - val_accuracy: 0.7581 Epoch 7/100 31/31 [==============================] - 0s 4ms/step - loss: 0.7501 - accuracy: 0.7196 - val_loss: 0.7066 - val_accuracy: 0.7863 Epoch 8/100 31/31 [==============================] - 0s 3ms/step - loss: 0.6748 - accuracy: 0.7470 - val_loss: 0.6057 - val_accuracy: 0.8226 Epoch 9/100 31/31 [==============================] - 0s 3ms/step - loss: 0.6606 - accuracy: 0.7652 - val_loss: 0.5471 - val_accuracy: 0.8387 Epoch 10/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5493 - accuracy: 0.7966 - val_loss: 0.5272 - val_accuracy: 0.8347 Epoch 11/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4770 - accuracy: 0.8289 - val_loss: 0.5000 - val_accuracy: 0.8468 Epoch 12/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5013 - accuracy: 0.8249 - val_loss: 0.4304 - val_accuracy: 0.8831 Epoch 13/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4465 - accuracy: 0.8482 - val_loss: 0.4301 - val_accuracy: 0.8790 Epoch 14/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3853 - accuracy: 0.8623 - val_loss: 0.4239 - val_accuracy: 0.8871 Epoch 15/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4069 - accuracy: 0.8482 - val_loss: 0.4474 - val_accuracy: 0.8589 Epoch 16/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3803 - accuracy: 0.8715 - val_loss: 0.3785 - val_accuracy: 0.8750 Epoch 17/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3230 - accuracy: 0.8988 - val_loss: 0.3648 - val_accuracy: 0.9032 Epoch 18/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3236 - accuracy: 0.8897 - val_loss: 0.3682 - val_accuracy: 0.8790 Epoch 19/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3180 - accuracy: 0.8836 - val_loss: 0.3433 - val_accuracy: 0.8911 Epoch 20/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2886 - accuracy: 0.9079 - val_loss: 0.3487 - val_accuracy: 0.8790 Epoch 21/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3076 - accuracy: 0.9018 - val_loss: 0.3390 - val_accuracy: 0.9032 Epoch 22/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2793 - accuracy: 0.9018 - val_loss: 0.3231 - val_accuracy: 0.8992 Epoch 23/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2596 - accuracy: 0.9140 - val_loss: 0.3613 - val_accuracy: 0.8790 Epoch 24/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2537 - accuracy: 0.9130 - val_loss: 0.3112 - val_accuracy: 0.9032 Epoch 25/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2571 - accuracy: 0.9180 - val_loss: 0.3539 - val_accuracy: 0.8911 Epoch 26/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2234 - accuracy: 0.9241 - val_loss: 0.3420 - val_accuracy: 0.9073 Epoch 27/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1994 - accuracy: 0.9332 - val_loss: 0.3873 - val_accuracy: 0.8790 Epoch 28/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1994 - accuracy: 0.9322 - val_loss: 0.3460 - val_accuracy: 0.9153 Epoch 29/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2043 - accuracy: 0.9383 - val_loss: 0.3529 - val_accuracy: 0.9113 Epoch 30/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2324 - accuracy: 0.9160 - val_loss: 0.3733 - val_accuracy: 0.8992 Epoch 31/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2002 - accuracy: 0.9281 - val_loss: 0.3567 - val_accuracy: 0.9032 Epoch 32/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1951 - accuracy: 0.9352 - val_loss: 0.3816 - val_accuracy: 0.8952 Epoch 33/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1972 - accuracy: 0.9302 - val_loss: 0.3350 - val_accuracy: 0.9073 Epoch 34/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1718 - accuracy: 0.9352 - val_loss: 0.3315 - val_accuracy: 0.9032 Epoch 35/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1862 - accuracy: 0.9291 - val_loss: 0.3576 - val_accuracy: 0.9073 Epoch 36/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1700 - accuracy: 0.9443 - val_loss: 0.3241 - val_accuracy: 0.9194 Epoch 37/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1698 - accuracy: 0.9372 - val_loss: 0.2943 - val_accuracy: 0.9315 Epoch 38/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1792 - accuracy: 0.9342 - val_loss: 0.3121 - val_accuracy: 0.9113 Epoch 39/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1466 - accuracy: 0.9484 - val_loss: 0.3379 - val_accuracy: 0.9194 Epoch 40/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1681 - accuracy: 0.9453 - val_loss: 0.3323 - val_accuracy: 0.9153 Epoch 41/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1483 - accuracy: 0.9524 - val_loss: 0.3719 - val_accuracy: 0.9153 Epoch 42/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1283 - accuracy: 0.9524 - val_loss: 0.3283 - val_accuracy: 0.9194 Epoch 43/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1140 - accuracy: 0.9626 - val_loss: 0.3347 - val_accuracy: 0.9234 Epoch 44/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1246 - accuracy: 0.9656 - val_loss: 0.3722 - val_accuracy: 0.9153 Epoch 45/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1483 - accuracy: 0.9504 - val_loss: 0.4147 - val_accuracy: 0.9032 Epoch 46/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1545 - accuracy: 0.9393 - val_loss: 0.3530 - val_accuracy: 0.9113 Epoch 47/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1279 - accuracy: 0.9514 - val_loss: 0.3734 - val_accuracy: 0.9153 Epoch 48/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1378 - accuracy: 0.9413 - val_loss: 0.3398 - val_accuracy: 0.9234 Epoch 49/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1191 - accuracy: 0.9605 - val_loss: 0.4168 - val_accuracy: 0.8911 Epoch 50/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1072 - accuracy: 0.9605 - val_loss: 0.3660 - val_accuracy: 0.9113 Epoch 51/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1151 - accuracy: 0.9615 - val_loss: 0.3474 - val_accuracy: 0.9073 Epoch 52/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1005 - accuracy: 0.9676 - val_loss: 0.3377 - val_accuracy: 0.9194 Epoch 53/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1106 - accuracy: 0.9636 - val_loss: 0.3472 - val_accuracy: 0.9153 Epoch 54/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1281 - accuracy: 0.9585 - val_loss: 0.3152 - val_accuracy: 0.9113 Epoch 55/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1122 - accuracy: 0.9656 - val_loss: 0.3342 - val_accuracy: 0.9234 Epoch 56/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1215 - accuracy: 0.9615 - val_loss: 0.3265 - val_accuracy: 0.9194 Epoch 57/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1136 - accuracy: 0.9585 - val_loss: 0.3666 - val_accuracy: 0.9153 Epoch 58/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1078 - accuracy: 0.9636 - val_loss: 0.3789 - val_accuracy: 0.9073 Epoch 59/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1241 - accuracy: 0.9585 - val_loss: 0.3870 - val_accuracy: 0.9113 Epoch 60/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1083 - accuracy: 0.9636 - val_loss: 0.3789 - val_accuracy: 0.9113 Epoch 61/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1119 - accuracy: 0.9636 - val_loss: 0.3539 - val_accuracy: 0.9194 Epoch 62/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1131 - accuracy: 0.9626 - val_loss: 0.3328 - val_accuracy: 0.9194 Epoch 63/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0975 - accuracy: 0.9646 - val_loss: 0.3033 - val_accuracy: 0.9234 Epoch 64/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0972 - accuracy: 0.9737 - val_loss: 0.2692 - val_accuracy: 0.9476 Epoch 65/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0931 - accuracy: 0.9646 - val_loss: 0.3557 - val_accuracy: 0.9194 Epoch 66/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1063 - accuracy: 0.9696 - val_loss: 0.3972 - val_accuracy: 0.9032 Epoch 67/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1085 - accuracy: 0.9656 - val_loss: 0.3347 - val_accuracy: 0.9194 Epoch 68/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0905 - accuracy: 0.9737 - val_loss: 0.3481 - val_accuracy: 0.9194 Epoch 69/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0682 - accuracy: 0.9787 - val_loss: 0.3877 - val_accuracy: 0.9153 Epoch 70/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0964 - accuracy: 0.9626 - val_loss: 0.3537 - val_accuracy: 0.9234 Epoch 71/100 31/31 [==============================] - 0s 4ms/step - loss: 0.0947 - accuracy: 0.9696 - val_loss: 0.3654 - val_accuracy: 0.9234 Epoch 72/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0754 - accuracy: 0.9717 - val_loss: 0.3361 - val_accuracy: 0.9355 Epoch 73/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1017 - accuracy: 0.9656 - val_loss: 0.3643 - val_accuracy: 0.9153 Epoch 74/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0746 - accuracy: 0.9767 - val_loss: 0.3711 - val_accuracy: 0.9274 Epoch 75/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0859 - accuracy: 0.9686 - val_loss: 0.4052 - val_accuracy: 0.8992 Epoch 76/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0597 - accuracy: 0.9808 - val_loss: 0.3819 - val_accuracy: 0.9234 Epoch 77/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0587 - accuracy: 0.9848 - val_loss: 0.4208 - val_accuracy: 0.9073 Epoch 78/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0859 - accuracy: 0.9747 - val_loss: 0.3475 - val_accuracy: 0.9395 Epoch 79/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0607 - accuracy: 0.9737 - val_loss: 0.3437 - val_accuracy: 0.9355 Epoch 80/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0672 - accuracy: 0.9787 - val_loss: 0.4236 - val_accuracy: 0.9194 Epoch 81/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0677 - accuracy: 0.9848 - val_loss: 0.3963 - val_accuracy: 0.9274 Epoch 82/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0622 - accuracy: 0.9737 - val_loss: 0.4042 - val_accuracy: 0.9113 Epoch 83/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0729 - accuracy: 0.9767 - val_loss: 0.3693 - val_accuracy: 0.9274 Epoch 84/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0702 - accuracy: 0.9737 - val_loss: 0.3441 - val_accuracy: 0.9355 Epoch 85/100 31/31 [==============================] - 0s 4ms/step - loss: 0.0806 - accuracy: 0.9737 - val_loss: 0.4043 - val_accuracy: 0.9274 Epoch 86/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0688 - accuracy: 0.9828 - val_loss: 0.3679 - val_accuracy: 0.9435 Epoch 87/100 31/31 [==============================] - 0s 2ms/step - loss: 0.0643 - accuracy: 0.9798 - val_loss: 0.3768 - val_accuracy: 0.9315 Epoch 88/100 31/31 [==============================] - 0s 2ms/step - loss: 0.0861 - accuracy: 0.9696 - val_loss: 0.4251 - val_accuracy: 0.9315 Epoch 89/100 31/31 [==============================] - 0s 2ms/step - loss: 0.0589 - accuracy: 0.9798 - val_loss: 0.4353 - val_accuracy: 0.9315 Epoch 90/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0587 - accuracy: 0.9828 - val_loss: 0.4142 - val_accuracy: 0.9395 Epoch 91/100 31/31 [==============================] - 0s 4ms/step - loss: 0.0819 - accuracy: 0.9747 - val_loss: 0.4158 - val_accuracy: 0.9315 Epoch 92/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0498 - accuracy: 0.9828 - val_loss: 0.4324 - val_accuracy: 0.9194 Epoch 93/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0573 - accuracy: 0.9818 - val_loss: 0.4172 - val_accuracy: 0.9274 Epoch 94/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1009 - accuracy: 0.9696 - val_loss: 0.4297 - val_accuracy: 0.9153 Epoch 95/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0708 - accuracy: 0.9777 - val_loss: 0.3887 - val_accuracy: 0.9315 Epoch 96/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0885 - accuracy: 0.9686 - val_loss: 0.3892 - val_accuracy: 0.9315 Epoch 97/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0684 - accuracy: 0.9777 - val_loss: 0.4211 - val_accuracy: 0.9315 Epoch 98/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0677 - accuracy: 0.9747 - val_loss: 0.3909 - val_accuracy: 0.9315 Epoch 99/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0727 - accuracy: 0.9757 - val_loss: 0.3932 - val_accuracy: 0.9315 Epoch 100/100 31/31 [==============================] - 0s 3ms/step - loss: 0.0442 - accuracy: 0.9808 - val_loss: 0.3687 - val_accuracy: 0.9274
#ploting the Accuary and Loss on the model performance for
def plot(history):
x = np.arange(0,len(history.history['accuracy']))
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
fig = px.line(x=x, y=[accuracy,val_accuracy],width=800, height=400,labels={"x":'Epochs'}, title="Training and Validation Accuracy")
fig.show()
x = np.arange(0,len(history.history['loss']))
loss = history.history['loss']
val_loss = history.history['val_loss']
fig = px.line(x=x, y=[loss,val_loss],width=800, height=400,labels={"x":'Epochs'},title="Training and Validation Loss")
fig.show()
plot(history)
#creating Function to evaluate the model performace with the training data
def ACC_model_eval(model,Xtest,ytest):
ypred = model.predict(Xtest)
ypred = [np.argmax(i) for i in ypred]
print('\nModel Evaluation:\n')
print('Accuracy Score: {} %'.format(round(accuracy_score(ypred,ytest)*100,2)))
print('Precision Score: {} %'.format(round(precision_score(ypred,ytest,average='weighted')*100,2)))
print('Recall Score: {} %'.format(round(recall_score(ypred,ytest,average='weighted')*100,2)))
print('F1 Score: {} %'.format(round(f1_score(ypred,ytest,average='weighted')*100,2)))
print("\nClassification Report:\n",classification_report(ypred,ytest))
cm = confusion_matrix(ypred,ytest)
print("\nConfusion Matrix:")
sns.heatmap(cm,annot=True,
yticklabels = ['I','II','III','IV','V'],
xticklabels = ['I','II','III','IV','V'],
cmap='Blues')
plt.show()
ACC_model_eval(model,Xtest_scale,ytest)
10/10 [==============================] - 0s 2ms/step
Model Evaluation:
Accuracy Score: 93.2 %
Precision Score: 93.23 %
Recall Score: 93.2 %
F1 Score: 93.19 %
Classification Report:
precision recall f1-score support
0 0.86 0.83 0.84 59
1 0.87 0.88 0.88 52
2 0.98 0.95 0.97 62
3 0.94 0.99 0.96 73
4 1.00 0.98 0.99 63
accuracy 0.93 309
macro avg 0.93 0.93 0.93 309
weighted avg 0.93 0.93 0.93 309
Confusion Matrix:
1.B Creating as ANN model to Predict the Critical Risk using the Preprocessed Data
#importing Data to access the Critical risk target variable
data = pd.read_csv('Data.csv')
#preparing the X variable by indexing and removing all the Critical risk columns
def get_columns_with_prefix(df, prefix='Critical Risk'):
columns = df.columns
matching_indexes = []
for i, column in enumerate(columns):
if column.startswith(prefix):
matching_indexes.append(i)
return matching_indexes
indexes = get_columns_with_prefix(model_data)
X = model_data.drop(model_data.columns[indexes],axis=1)
critical_risk = data['Critical Risk']
cri_encoder = LabelEncoder()
y = cri_encoder.fit_transform(critical_risk)
def CRI_data_split(X,y):
from imblearn.over_sampling import RandomOverSampler
Xtrain_upsam,ytrain_upsam = RandomOverSampler().fit_resample(X,y)
#making train and test Split from the X and y
Xtrain,Xtest,ytrain,ytest = train_test_split(Xtrain_upsam,ytrain_upsam,test_size=0.20,random_state=12)
#making validation split for the train data
Xtrain,Xval,ytrain,yval = train_test_split(Xtrain,ytrain,test_size=0.20,random_state=12)
#displaying the shape of the train, test and validation dataset
print("Independent Variables:\n")
print("The X-Train Data as {}".format(Xtrain.shape))
print("The X-Validation Data as {}".format(Xval.shape))
print("The X-Test Data as {}".format(Xtest.shape))
print("\nTarget Variables:\n")
print("The y-Train Data as {}".format(ytrain.shape))
print("The y-Validation Data as {}".format(yval.shape))
print("The y-Test Data as {}".format(ytest.shape))
return Xtrain,Xval,Xtest,ytrain,yval,ytest
Xtrain,Xval,Xtest,ytrain,yval,ytest = CRI_data_split(X,y)
Independent Variables: The X-Train Data as (4836, 224) The X-Validation Data as (1209, 224) The X-Test Data as (1512, 224) Target Variables: The y-Train Data as (4836,) The y-Validation Data as (1209,) The y-Test Data as (1512,)
ytrain_label = to_categorical(ytrain)
yval_label = to_categorical(yval)
scaler = StandardScaler()
Xtrain_scale = scaler.fit_transform(Xtrain)
Xval_scale = scaler.fit_transform(Xval)
Xtest_scale =scaler.fit_transform(Xtest)
model,history = base_model(Xtrain_scale, ytrain_label,Xval_scale, yval_label)
Epoch 1/100 152/152 [==============================] - 1s 3ms/step - loss: 2.7172 - accuracy: 0.2860 - val_loss: 1.3728 - val_accuracy: 0.7138 Epoch 2/100 152/152 [==============================] - 0s 2ms/step - loss: 1.5599 - accuracy: 0.5767 - val_loss: 0.7090 - val_accuracy: 0.8321 Epoch 3/100 152/152 [==============================] - 0s 2ms/step - loss: 1.1502 - accuracy: 0.6760 - val_loss: 0.4823 - val_accuracy: 0.8710 Epoch 4/100 152/152 [==============================] - 0s 2ms/step - loss: 0.8898 - accuracy: 0.7397 - val_loss: 0.3719 - val_accuracy: 0.8941 Epoch 5/100 152/152 [==============================] - 0s 2ms/step - loss: 0.7593 - accuracy: 0.7773 - val_loss: 0.2922 - val_accuracy: 0.9231 Epoch 6/100 152/152 [==============================] - 0s 2ms/step - loss: 0.6476 - accuracy: 0.8042 - val_loss: 0.2394 - val_accuracy: 0.9289 Epoch 7/100 152/152 [==============================] - 0s 2ms/step - loss: 0.5650 - accuracy: 0.8296 - val_loss: 0.2107 - val_accuracy: 0.9355 Epoch 8/100 152/152 [==============================] - 0s 2ms/step - loss: 0.5364 - accuracy: 0.8358 - val_loss: 0.1890 - val_accuracy: 0.9487 Epoch 9/100 152/152 [==============================] - 0s 2ms/step - loss: 0.4825 - accuracy: 0.8526 - val_loss: 0.1777 - val_accuracy: 0.9438 Epoch 10/100 152/152 [==============================] - 0s 2ms/step - loss: 0.4443 - accuracy: 0.8650 - val_loss: 0.1652 - val_accuracy: 0.9471 Epoch 11/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3988 - accuracy: 0.8755 - val_loss: 0.1479 - val_accuracy: 0.9462 Epoch 12/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3853 - accuracy: 0.8784 - val_loss: 0.1279 - val_accuracy: 0.9620 Epoch 13/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3520 - accuracy: 0.8904 - val_loss: 0.1326 - val_accuracy: 0.9595 Epoch 14/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3292 - accuracy: 0.8974 - val_loss: 0.1225 - val_accuracy: 0.9628 Epoch 15/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3183 - accuracy: 0.8995 - val_loss: 0.1187 - val_accuracy: 0.9620 Epoch 16/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3213 - accuracy: 0.8962 - val_loss: 0.1159 - val_accuracy: 0.9677 Epoch 17/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2994 - accuracy: 0.9055 - val_loss: 0.1162 - val_accuracy: 0.9669 Epoch 18/100 152/152 [==============================] - 0s 2ms/step - loss: 0.3029 - accuracy: 0.9080 - val_loss: 0.1044 - val_accuracy: 0.9562 Epoch 19/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2787 - accuracy: 0.9165 - val_loss: 0.0922 - val_accuracy: 0.9735 Epoch 20/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2530 - accuracy: 0.9198 - val_loss: 0.0975 - val_accuracy: 0.9719 Epoch 21/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2502 - accuracy: 0.9239 - val_loss: 0.0865 - val_accuracy: 0.9785 Epoch 22/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2370 - accuracy: 0.9237 - val_loss: 0.0861 - val_accuracy: 0.9744 Epoch 23/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2386 - accuracy: 0.9222 - val_loss: 0.0771 - val_accuracy: 0.9752 Epoch 24/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2284 - accuracy: 0.9260 - val_loss: 0.0719 - val_accuracy: 0.9859 Epoch 25/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2277 - accuracy: 0.9328 - val_loss: 0.0738 - val_accuracy: 0.9785 Epoch 26/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2181 - accuracy: 0.9342 - val_loss: 0.0686 - val_accuracy: 0.9810 Epoch 27/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2004 - accuracy: 0.9380 - val_loss: 0.0710 - val_accuracy: 0.9777 Epoch 28/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1990 - accuracy: 0.9400 - val_loss: 0.0645 - val_accuracy: 0.9801 Epoch 29/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1929 - accuracy: 0.9444 - val_loss: 0.0633 - val_accuracy: 0.9859 Epoch 30/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1961 - accuracy: 0.9361 - val_loss: 0.0686 - val_accuracy: 0.9810 Epoch 31/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1812 - accuracy: 0.9429 - val_loss: 0.0624 - val_accuracy: 0.9826 Epoch 32/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1697 - accuracy: 0.9477 - val_loss: 0.0564 - val_accuracy: 0.9826 Epoch 33/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1740 - accuracy: 0.9458 - val_loss: 0.0626 - val_accuracy: 0.9818 Epoch 34/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1600 - accuracy: 0.9475 - val_loss: 0.0488 - val_accuracy: 0.9859 Epoch 35/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1707 - accuracy: 0.9460 - val_loss: 0.0465 - val_accuracy: 0.9859 Epoch 36/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1636 - accuracy: 0.9475 - val_loss: 0.0515 - val_accuracy: 0.9868 Epoch 37/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1460 - accuracy: 0.9557 - val_loss: 0.0472 - val_accuracy: 0.9851 Epoch 38/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1625 - accuracy: 0.9483 - val_loss: 0.0466 - val_accuracy: 0.9843 Epoch 39/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1544 - accuracy: 0.9514 - val_loss: 0.0515 - val_accuracy: 0.9884 Epoch 40/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1598 - accuracy: 0.9502 - val_loss: 0.0482 - val_accuracy: 0.9876 Epoch 41/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1506 - accuracy: 0.9549 - val_loss: 0.0455 - val_accuracy: 0.9876 Epoch 42/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1444 - accuracy: 0.9560 - val_loss: 0.0410 - val_accuracy: 0.9892 Epoch 43/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1308 - accuracy: 0.9609 - val_loss: 0.0404 - val_accuracy: 0.9892 Epoch 44/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1257 - accuracy: 0.9609 - val_loss: 0.0381 - val_accuracy: 0.9892 Epoch 45/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1297 - accuracy: 0.9580 - val_loss: 0.0405 - val_accuracy: 0.9892 Epoch 46/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1339 - accuracy: 0.9597 - val_loss: 0.0462 - val_accuracy: 0.9876 Epoch 47/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1265 - accuracy: 0.9620 - val_loss: 0.0442 - val_accuracy: 0.9876 Epoch 48/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1375 - accuracy: 0.9593 - val_loss: 0.0430 - val_accuracy: 0.9901 Epoch 49/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1192 - accuracy: 0.9651 - val_loss: 0.0466 - val_accuracy: 0.9884 Epoch 50/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1245 - accuracy: 0.9638 - val_loss: 0.0400 - val_accuracy: 0.9901 Epoch 51/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1179 - accuracy: 0.9644 - val_loss: 0.0411 - val_accuracy: 0.9876 Epoch 52/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1104 - accuracy: 0.9667 - val_loss: 0.0358 - val_accuracy: 0.9909 Epoch 53/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1113 - accuracy: 0.9677 - val_loss: 0.0373 - val_accuracy: 0.9909 Epoch 54/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1029 - accuracy: 0.9673 - val_loss: 0.0381 - val_accuracy: 0.9909 Epoch 55/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1145 - accuracy: 0.9620 - val_loss: 0.0472 - val_accuracy: 0.9884 Epoch 56/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1043 - accuracy: 0.9671 - val_loss: 0.0412 - val_accuracy: 0.9901 Epoch 57/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1155 - accuracy: 0.9669 - val_loss: 0.0329 - val_accuracy: 0.9917 Epoch 58/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1043 - accuracy: 0.9698 - val_loss: 0.0356 - val_accuracy: 0.9892 Epoch 59/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1039 - accuracy: 0.9659 - val_loss: 0.0355 - val_accuracy: 0.9917 Epoch 60/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1023 - accuracy: 0.9682 - val_loss: 0.0358 - val_accuracy: 0.9917 Epoch 61/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0988 - accuracy: 0.9704 - val_loss: 0.0352 - val_accuracy: 0.9901 Epoch 62/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0961 - accuracy: 0.9690 - val_loss: 0.0282 - val_accuracy: 0.9901 Epoch 63/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0980 - accuracy: 0.9706 - val_loss: 0.0303 - val_accuracy: 0.9909 Epoch 64/100 152/152 [==============================] - 1s 4ms/step - loss: 0.1044 - accuracy: 0.9702 - val_loss: 0.0370 - val_accuracy: 0.9901 Epoch 65/100 152/152 [==============================] - 1s 3ms/step - loss: 0.0894 - accuracy: 0.9742 - val_loss: 0.0378 - val_accuracy: 0.9901 Epoch 66/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0880 - accuracy: 0.9713 - val_loss: 0.0391 - val_accuracy: 0.9901 Epoch 67/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0949 - accuracy: 0.9739 - val_loss: 0.0390 - val_accuracy: 0.9917 Epoch 68/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0886 - accuracy: 0.9729 - val_loss: 0.0334 - val_accuracy: 0.9917 Epoch 69/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0775 - accuracy: 0.9785 - val_loss: 0.0296 - val_accuracy: 0.9926 Epoch 70/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0859 - accuracy: 0.9754 - val_loss: 0.0349 - val_accuracy: 0.9884 Epoch 71/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1023 - accuracy: 0.9713 - val_loss: 0.0304 - val_accuracy: 0.9892 Epoch 72/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0991 - accuracy: 0.9700 - val_loss: 0.0390 - val_accuracy: 0.9901 Epoch 73/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0892 - accuracy: 0.9764 - val_loss: 0.0370 - val_accuracy: 0.9892 Epoch 74/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0745 - accuracy: 0.9791 - val_loss: 0.0384 - val_accuracy: 0.9892 Epoch 75/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0764 - accuracy: 0.9775 - val_loss: 0.0292 - val_accuracy: 0.9909 Epoch 76/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0863 - accuracy: 0.9775 - val_loss: 0.0305 - val_accuracy: 0.9934 Epoch 77/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0695 - accuracy: 0.9785 - val_loss: 0.0339 - val_accuracy: 0.9909 Epoch 78/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0677 - accuracy: 0.9816 - val_loss: 0.0330 - val_accuracy: 0.9901 Epoch 79/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0787 - accuracy: 0.9758 - val_loss: 0.0360 - val_accuracy: 0.9892 Epoch 80/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0771 - accuracy: 0.9775 - val_loss: 0.0375 - val_accuracy: 0.9884 Epoch 81/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0807 - accuracy: 0.9762 - val_loss: 0.0291 - val_accuracy: 0.9909 Epoch 82/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0773 - accuracy: 0.9785 - val_loss: 0.0310 - val_accuracy: 0.9917 Epoch 83/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0859 - accuracy: 0.9758 - val_loss: 0.0402 - val_accuracy: 0.9892 Epoch 84/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0874 - accuracy: 0.9739 - val_loss: 0.0308 - val_accuracy: 0.9917 Epoch 85/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0807 - accuracy: 0.9750 - val_loss: 0.0265 - val_accuracy: 0.9934 Epoch 86/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0740 - accuracy: 0.9777 - val_loss: 0.0331 - val_accuracy: 0.9926 Epoch 87/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0695 - accuracy: 0.9777 - val_loss: 0.0364 - val_accuracy: 0.9884 Epoch 88/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0717 - accuracy: 0.9795 - val_loss: 0.0372 - val_accuracy: 0.9917 Epoch 89/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0797 - accuracy: 0.9775 - val_loss: 0.0389 - val_accuracy: 0.9901 Epoch 90/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0767 - accuracy: 0.9789 - val_loss: 0.0370 - val_accuracy: 0.9901 Epoch 91/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0642 - accuracy: 0.9810 - val_loss: 0.0398 - val_accuracy: 0.9876 Epoch 92/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0532 - accuracy: 0.9851 - val_loss: 0.0307 - val_accuracy: 0.9917 Epoch 93/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0685 - accuracy: 0.9797 - val_loss: 0.0356 - val_accuracy: 0.9917 Epoch 94/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0718 - accuracy: 0.9785 - val_loss: 0.0308 - val_accuracy: 0.9917 Epoch 95/100 152/152 [==============================] - 0s 3ms/step - loss: 0.0827 - accuracy: 0.9773 - val_loss: 0.0301 - val_accuracy: 0.9917 Epoch 96/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0710 - accuracy: 0.9804 - val_loss: 0.0161 - val_accuracy: 0.9934 Epoch 97/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0779 - accuracy: 0.9795 - val_loss: 0.0264 - val_accuracy: 0.9926 Epoch 98/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0702 - accuracy: 0.9791 - val_loss: 0.0182 - val_accuracy: 0.9950 Epoch 99/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0750 - accuracy: 0.9795 - val_loss: 0.0201 - val_accuracy: 0.9934 Epoch 100/100 152/152 [==============================] - 0s 2ms/step - loss: 0.0521 - accuracy: 0.9828 - val_loss: 0.0271 - val_accuracy: 0.9926
plot(history)
#creating Function to evaluate the model performace with the training data
def CRI_model_eval(model,Xtest,ytest,encoder):
ypred = model.predict(Xtest)
# prob = model.predict_proba(Xtest)
# print(prob)
ypred = [np.argmax(i) for i in ypred]
print('\nModel Evaluation:\n')
print('Accuracy Score: {} %'.format(round(accuracy_score(ypred,ytest)*100,2)))
print('Precision Score: {} %'.format(round(precision_score(ypred,ytest,average='weighted')*100,2)))
print('Recall Score: {} %'.format(round(recall_score(ypred,ytest,average='weighted')*100,2)))
print('F1 Score: {} %'.format(round(f1_score(ypred,ytest,average='weighted')*100,2)))
print("\nClassification Report:\n",classification_report(ypred,ytest))
cm = confusion_matrix(ypred,ytest)
plt.figure(figsize=(20,20))
sns.heatmap(cm,annot=True,
yticklabels = np.arange(0,33),
xticklabels = np.arange(0,33),
cmap='Blues')
plt.show()
# predicting the Top 3 Critical Risk
predictions = model.predict(Xtest)
preds_idx = np.argsort(-predictions)
result = []
for i in range(0,len(preds_idx)):
result.append(cri_encoder.inverse_transform(preds_idx[i][:3]))
return result
result = CRI_model_eval(model,Xtest_scale,ytest,cri_encoder)
48/48 [==============================] - 0s 1ms/step
Model Evaluation:
Accuracy Score: 99.47 %
Precision Score: 99.56 %
Recall Score: 99.47 %
F1 Score: 99.49 %
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 43
1 1.00 1.00 1.00 42
2 1.00 1.00 1.00 38
3 1.00 1.00 1.00 48
4 1.00 0.97 0.99 39
5 1.00 1.00 1.00 39
6 1.00 1.00 1.00 47
7 1.00 1.00 1.00 63
8 1.00 1.00 1.00 47
9 1.00 0.98 0.99 58
10 1.00 1.00 1.00 56
11 1.00 0.96 0.98 47
12 1.00 1.00 1.00 40
13 1.00 1.00 1.00 36
14 1.00 1.00 1.00 42
15 1.00 0.91 0.95 45
16 0.83 1.00 0.91 40
17 1.00 1.00 1.00 34
18 1.00 1.00 1.00 43
19 1.00 1.00 1.00 36
20 1.00 1.00 1.00 44
21 1.00 1.00 1.00 41
22 1.00 1.00 1.00 47
23 1.00 1.00 1.00 50
24 1.00 1.00 1.00 51
25 1.00 1.00 1.00 52
26 1.00 1.00 1.00 61
27 1.00 1.00 1.00 48
28 1.00 1.00 1.00 38
29 1.00 1.00 1.00 43
30 1.00 1.00 1.00 55
31 1.00 1.00 1.00 44
32 1.00 1.00 1.00 55
accuracy 0.99 1512
macro avg 0.99 0.99 0.99 1512
weighted avg 1.00 0.99 0.99 1512
48/48 [==============================] - 0s 1ms/step
2. As per the Project objective, We are Considering the Incident Description as the independent variable based on that we will predict the Accident Level and Critical Risk Using ANN
2.1 Predicting the Accident Level by Description using ANN
data = pd.read_csv('Cleaned Data.csv')
X = data['Cleaned_Description']
Glove Word Embedding
embeddings = {}
with open("glove.6B.200d.txt", 'r', encoding="utf-8") as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], "float32")
embeddings[word] = vector
def sen2vec(sentence):
words = [word for word in sentence if word.isalpha()]
m = []
for w in words:
try:
m.append(embeddings[w])
except:
continue
m=np.array(m)
v=m.sum(axis=0)
if type(v) != np.ndarray:
return np.zeros(300)
return v/np.sqrt((v**2).sum())
modeldata_glove = [sen2vec(x) for x in X]
df_golve = pd.DataFrame(modeldata_glove)
X = df_golve
y = data['Accident Level']
Xtrain,Xval,Xtest,ytrain,yval,ytest = ACC_data_split(X,y)
Independent Variables: The X-Train Data as (988, 200) The X-Validation Data as (248, 200) The X-Test Data as (309, 200) Target Variables: The y-Train Data as (988,) The y-Validation Data as (248,) The y-Test Data as (309,)
#converting the target variable from numeric to categorical
ytrain_label = to_categorical(ytrain)
yval_label = to_categorical(yval)
#Preparining the independent variables for NN
scaler = StandardScaler()
Xtrain_scale = scaler.fit_transform(Xtrain)
Xval_scale = scaler.fit_transform(Xval)
Xtest_scale =scaler.fit_transform(Xtest)
model,history = base_model(Xtrain_scale, ytrain_label,Xval_scale, yval_label)
Epoch 1/100 31/31 [==============================] - 1s 7ms/step - loss: 1.7262 - accuracy: 0.2672 - val_loss: 1.4049 - val_accuracy: 0.4597 Epoch 2/100 31/31 [==============================] - 0s 2ms/step - loss: 1.4431 - accuracy: 0.3957 - val_loss: 1.2740 - val_accuracy: 0.5242 Epoch 3/100 31/31 [==============================] - 0s 2ms/step - loss: 1.3148 - accuracy: 0.4626 - val_loss: 1.1226 - val_accuracy: 0.6169 Epoch 4/100 31/31 [==============================] - 0s 3ms/step - loss: 1.1754 - accuracy: 0.5071 - val_loss: 1.0015 - val_accuracy: 0.6774 Epoch 5/100 31/31 [==============================] - 0s 3ms/step - loss: 1.0891 - accuracy: 0.5789 - val_loss: 0.8628 - val_accuracy: 0.7339 Epoch 6/100 31/31 [==============================] - 0s 3ms/step - loss: 0.9776 - accuracy: 0.6113 - val_loss: 0.7690 - val_accuracy: 0.7621 Epoch 7/100 31/31 [==============================] - 0s 3ms/step - loss: 0.9152 - accuracy: 0.6397 - val_loss: 0.7107 - val_accuracy: 0.7782 Epoch 8/100 31/31 [==============================] - 0s 3ms/step - loss: 0.8413 - accuracy: 0.6609 - val_loss: 0.6510 - val_accuracy: 0.7944 Epoch 9/100 31/31 [==============================] - 0s 3ms/step - loss: 0.7769 - accuracy: 0.7055 - val_loss: 0.5886 - val_accuracy: 0.7984 Epoch 10/100 31/31 [==============================] - 0s 3ms/step - loss: 0.7462 - accuracy: 0.7166 - val_loss: 0.5287 - val_accuracy: 0.8226 Epoch 11/100 31/31 [==============================] - 0s 3ms/step - loss: 0.6773 - accuracy: 0.7348 - val_loss: 0.4989 - val_accuracy: 0.8226 Epoch 12/100 31/31 [==============================] - 0s 3ms/step - loss: 0.6888 - accuracy: 0.7429 - val_loss: 0.4992 - val_accuracy: 0.8266 Epoch 13/100 31/31 [==============================] - 0s 2ms/step - loss: 0.6247 - accuracy: 0.7692 - val_loss: 0.4684 - val_accuracy: 0.8226 Epoch 14/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5827 - accuracy: 0.7854 - val_loss: 0.4828 - val_accuracy: 0.8387 Epoch 15/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5428 - accuracy: 0.7844 - val_loss: 0.4182 - val_accuracy: 0.8589 Epoch 16/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5256 - accuracy: 0.8067 - val_loss: 0.4236 - val_accuracy: 0.8750 Epoch 17/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5020 - accuracy: 0.8016 - val_loss: 0.4004 - val_accuracy: 0.8669 Epoch 18/100 31/31 [==============================] - 0s 3ms/step - loss: 0.5126 - accuracy: 0.8097 - val_loss: 0.3985 - val_accuracy: 0.8750 Epoch 19/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4805 - accuracy: 0.8168 - val_loss: 0.3508 - val_accuracy: 0.8911 Epoch 20/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4756 - accuracy: 0.8229 - val_loss: 0.3663 - val_accuracy: 0.8790 Epoch 21/100 31/31 [==============================] - 0s 2ms/step - loss: 0.4287 - accuracy: 0.8441 - val_loss: 0.3690 - val_accuracy: 0.8750 Epoch 22/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4350 - accuracy: 0.8411 - val_loss: 0.3166 - val_accuracy: 0.9153 Epoch 23/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4265 - accuracy: 0.8431 - val_loss: 0.3465 - val_accuracy: 0.8992 Epoch 24/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3958 - accuracy: 0.8573 - val_loss: 0.3322 - val_accuracy: 0.9032 Epoch 25/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3920 - accuracy: 0.8603 - val_loss: 0.3376 - val_accuracy: 0.8911 Epoch 26/100 31/31 [==============================] - 0s 2ms/step - loss: 0.3812 - accuracy: 0.8613 - val_loss: 0.3024 - val_accuracy: 0.9073 Epoch 27/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3574 - accuracy: 0.8674 - val_loss: 0.3148 - val_accuracy: 0.8831 Epoch 28/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3791 - accuracy: 0.8623 - val_loss: 0.3066 - val_accuracy: 0.8952 Epoch 29/100 31/31 [==============================] - 0s 3ms/step - loss: 0.4004 - accuracy: 0.8472 - val_loss: 0.2730 - val_accuracy: 0.9073 Epoch 30/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3872 - accuracy: 0.8603 - val_loss: 0.2992 - val_accuracy: 0.8831 Epoch 31/100 31/31 [==============================] - 0s 2ms/step - loss: 0.3363 - accuracy: 0.8806 - val_loss: 0.2771 - val_accuracy: 0.9073 Epoch 32/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3319 - accuracy: 0.8755 - val_loss: 0.2795 - val_accuracy: 0.9073 Epoch 33/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3257 - accuracy: 0.8887 - val_loss: 0.3049 - val_accuracy: 0.8992 Epoch 34/100 31/31 [==============================] - 0s 2ms/step - loss: 0.3136 - accuracy: 0.8957 - val_loss: 0.3177 - val_accuracy: 0.8952 Epoch 35/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3070 - accuracy: 0.8998 - val_loss: 0.2877 - val_accuracy: 0.9032 Epoch 36/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3019 - accuracy: 0.8826 - val_loss: 0.2941 - val_accuracy: 0.9113 Epoch 37/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3105 - accuracy: 0.8927 - val_loss: 0.2868 - val_accuracy: 0.8952 Epoch 38/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3005 - accuracy: 0.8826 - val_loss: 0.2646 - val_accuracy: 0.9032 Epoch 39/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2856 - accuracy: 0.8937 - val_loss: 0.2639 - val_accuracy: 0.8952 Epoch 40/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2678 - accuracy: 0.9028 - val_loss: 0.2346 - val_accuracy: 0.9153 Epoch 41/100 31/31 [==============================] - 0s 3ms/step - loss: 0.3163 - accuracy: 0.8785 - val_loss: 0.3038 - val_accuracy: 0.8710 Epoch 42/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2850 - accuracy: 0.8968 - val_loss: 0.2725 - val_accuracy: 0.8952 Epoch 43/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2663 - accuracy: 0.9049 - val_loss: 0.2394 - val_accuracy: 0.9113 Epoch 44/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2647 - accuracy: 0.8988 - val_loss: 0.2619 - val_accuracy: 0.9073 Epoch 45/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2433 - accuracy: 0.9079 - val_loss: 0.2775 - val_accuracy: 0.8992 Epoch 46/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2675 - accuracy: 0.8998 - val_loss: 0.2171 - val_accuracy: 0.9234 Epoch 47/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2289 - accuracy: 0.9231 - val_loss: 0.2420 - val_accuracy: 0.9073 Epoch 48/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2681 - accuracy: 0.9018 - val_loss: 0.2198 - val_accuracy: 0.9113 Epoch 49/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1998 - accuracy: 0.9312 - val_loss: 0.2269 - val_accuracy: 0.9153 Epoch 50/100 31/31 [==============================] - 0s 2ms/step - loss: 0.2216 - accuracy: 0.9211 - val_loss: 0.2024 - val_accuracy: 0.9234 Epoch 51/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2456 - accuracy: 0.9231 - val_loss: 0.1905 - val_accuracy: 0.9274 Epoch 52/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1946 - accuracy: 0.9261 - val_loss: 0.2448 - val_accuracy: 0.9113 Epoch 53/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2494 - accuracy: 0.9059 - val_loss: 0.2282 - val_accuracy: 0.9153 Epoch 54/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2560 - accuracy: 0.9150 - val_loss: 0.2311 - val_accuracy: 0.9194 Epoch 55/100 31/31 [==============================] - 0s 2ms/step - loss: 0.2101 - accuracy: 0.9251 - val_loss: 0.2166 - val_accuracy: 0.9194 Epoch 56/100 31/31 [==============================] - 0s 2ms/step - loss: 0.2069 - accuracy: 0.9160 - val_loss: 0.1994 - val_accuracy: 0.9153 Epoch 57/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1694 - accuracy: 0.9393 - val_loss: 0.1811 - val_accuracy: 0.9315 Epoch 58/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2555 - accuracy: 0.9119 - val_loss: 0.2057 - val_accuracy: 0.9113 Epoch 59/100 31/31 [==============================] - 0s 4ms/step - loss: 0.2287 - accuracy: 0.9271 - val_loss: 0.2454 - val_accuracy: 0.9113 Epoch 60/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2000 - accuracy: 0.9322 - val_loss: 0.2174 - val_accuracy: 0.9153 Epoch 61/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1755 - accuracy: 0.9423 - val_loss: 0.2223 - val_accuracy: 0.9073 Epoch 62/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2003 - accuracy: 0.9231 - val_loss: 0.2244 - val_accuracy: 0.9113 Epoch 63/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1916 - accuracy: 0.9281 - val_loss: 0.2106 - val_accuracy: 0.9274 Epoch 64/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1819 - accuracy: 0.9443 - val_loss: 0.1874 - val_accuracy: 0.9194 Epoch 65/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1892 - accuracy: 0.9413 - val_loss: 0.1881 - val_accuracy: 0.9315 Epoch 66/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1757 - accuracy: 0.9524 - val_loss: 0.2279 - val_accuracy: 0.9234 Epoch 67/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1540 - accuracy: 0.9433 - val_loss: 0.2542 - val_accuracy: 0.9153 Epoch 68/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1848 - accuracy: 0.9383 - val_loss: 0.2279 - val_accuracy: 0.9274 Epoch 69/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1724 - accuracy: 0.9443 - val_loss: 0.1898 - val_accuracy: 0.9234 Epoch 70/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1637 - accuracy: 0.9474 - val_loss: 0.2207 - val_accuracy: 0.9194 Epoch 71/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1885 - accuracy: 0.9352 - val_loss: 0.2138 - val_accuracy: 0.9355 Epoch 72/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1578 - accuracy: 0.9474 - val_loss: 0.2399 - val_accuracy: 0.9113 Epoch 73/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1784 - accuracy: 0.9504 - val_loss: 0.2086 - val_accuracy: 0.9234 Epoch 74/100 31/31 [==============================] - 0s 3ms/step - loss: 0.2018 - accuracy: 0.9322 - val_loss: 0.2393 - val_accuracy: 0.9032 Epoch 75/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1570 - accuracy: 0.9423 - val_loss: 0.1729 - val_accuracy: 0.9274 Epoch 76/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1381 - accuracy: 0.9514 - val_loss: 0.2242 - val_accuracy: 0.9153 Epoch 77/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1496 - accuracy: 0.9464 - val_loss: 0.2307 - val_accuracy: 0.9153 Epoch 78/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1438 - accuracy: 0.9514 - val_loss: 0.2795 - val_accuracy: 0.9073 Epoch 79/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1666 - accuracy: 0.9453 - val_loss: 0.2088 - val_accuracy: 0.9194 Epoch 80/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1330 - accuracy: 0.9534 - val_loss: 0.2134 - val_accuracy: 0.9194 Epoch 81/100 31/31 [==============================] - 0s 4ms/step - loss: 0.1299 - accuracy: 0.9514 - val_loss: 0.2176 - val_accuracy: 0.9274 Epoch 82/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1186 - accuracy: 0.9615 - val_loss: 0.2203 - val_accuracy: 0.9395 Epoch 83/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1731 - accuracy: 0.9433 - val_loss: 0.2165 - val_accuracy: 0.9274 Epoch 84/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1660 - accuracy: 0.9403 - val_loss: 0.2057 - val_accuracy: 0.9355 Epoch 85/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1605 - accuracy: 0.9504 - val_loss: 0.2137 - val_accuracy: 0.9234 Epoch 86/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1428 - accuracy: 0.9555 - val_loss: 0.2166 - val_accuracy: 0.9274 Epoch 87/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1468 - accuracy: 0.9464 - val_loss: 0.1804 - val_accuracy: 0.9395 Epoch 88/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1207 - accuracy: 0.9494 - val_loss: 0.2094 - val_accuracy: 0.9274 Epoch 89/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1507 - accuracy: 0.9555 - val_loss: 0.2009 - val_accuracy: 0.9234 Epoch 90/100 31/31 [==============================] - 0s 2ms/step - loss: 0.1438 - accuracy: 0.9504 - val_loss: 0.1830 - val_accuracy: 0.9355 Epoch 91/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1776 - accuracy: 0.9514 - val_loss: 0.2478 - val_accuracy: 0.9073 Epoch 92/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1555 - accuracy: 0.9534 - val_loss: 0.1760 - val_accuracy: 0.9315 Epoch 93/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1095 - accuracy: 0.9636 - val_loss: 0.2241 - val_accuracy: 0.9234 Epoch 94/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1306 - accuracy: 0.9555 - val_loss: 0.2554 - val_accuracy: 0.9153 Epoch 95/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1140 - accuracy: 0.9605 - val_loss: 0.2493 - val_accuracy: 0.9234 Epoch 96/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1064 - accuracy: 0.9636 - val_loss: 0.2673 - val_accuracy: 0.9234 Epoch 97/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1545 - accuracy: 0.9565 - val_loss: 0.2002 - val_accuracy: 0.9395 Epoch 98/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1383 - accuracy: 0.9585 - val_loss: 0.2338 - val_accuracy: 0.9274 Epoch 99/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1112 - accuracy: 0.9646 - val_loss: 0.2223 - val_accuracy: 0.9395 Epoch 100/100 31/31 [==============================] - 0s 3ms/step - loss: 0.1186 - accuracy: 0.9676 - val_loss: 0.2509 - val_accuracy: 0.9355
plot(history)
result = ACC_model_eval(model,Xtest_scale,ytest)
10/10 [==============================] - 0s 2ms/step
Model Evaluation:
Accuracy Score: 95.79 %
Precision Score: 96.21 %
Recall Score: 95.79 %
F1 Score: 95.89 %
Classification Report:
precision recall f1-score support
0 0.82 0.96 0.89 49
1 0.98 0.91 0.95 57
2 0.98 0.97 0.98 61
3 0.99 0.96 0.97 79
4 1.00 0.98 0.99 63
accuracy 0.96 309
macro avg 0.96 0.96 0.95 309
weighted avg 0.96 0.96 0.96 309
Confusion Matrix:
2.B Predicting the Critical Risk by Description using ANN
X = df_golve.join(data['Accident Level'])
X.columns = X.columns.astype(str)
cri_encoder = LabelEncoder()
y = cri_encoder.fit_transform(critical_risk)
Xtrain,Xval,Xtest,ytrain,yval,ytest = CRI_data_split(X,y)
Independent Variables: The X-Train Data as (4836, 201) The X-Validation Data as (1209, 201) The X-Test Data as (1512, 201) Target Variables: The y-Train Data as (4836,) The y-Validation Data as (1209,) The y-Test Data as (1512,)
#converting the target variable from numeric to categorical
ytrain_label = to_categorical(ytrain)
yval_label = to_categorical(yval)
scaler = StandardScaler()
Xtrain_scale = scaler.fit_transform(Xtrain)
Xval_scale = scaler.fit_transform(Xval)
Xtest_scale =scaler.fit_transform(Xtest)
model,history = base_model(Xtrain_scale, ytrain_label,Xval_scale, yval_label)
Epoch 1/100 152/152 [==============================] - 1s 3ms/step - loss: 2.8648 - accuracy: 0.2355 - val_loss: 1.5983 - val_accuracy: 0.6749 Epoch 2/100 152/152 [==============================] - 0s 2ms/step - loss: 1.8505 - accuracy: 0.4911 - val_loss: 0.9078 - val_accuracy: 0.7916 Epoch 3/100 152/152 [==============================] - 0s 2ms/step - loss: 1.3548 - accuracy: 0.6210 - val_loss: 0.6534 - val_accuracy: 0.8470 Epoch 4/100 152/152 [==============================] - 0s 2ms/step - loss: 1.1354 - accuracy: 0.6778 - val_loss: 0.5108 - val_accuracy: 0.8660 Epoch 5/100 152/152 [==============================] - 0s 2ms/step - loss: 0.9622 - accuracy: 0.7155 - val_loss: 0.4313 - val_accuracy: 0.8900 Epoch 6/100 152/152 [==============================] - 0s 2ms/step - loss: 0.8917 - accuracy: 0.7341 - val_loss: 0.3757 - val_accuracy: 0.9082 Epoch 7/100 152/152 [==============================] - 0s 2ms/step - loss: 0.7877 - accuracy: 0.7593 - val_loss: 0.3241 - val_accuracy: 0.9313 Epoch 8/100 152/152 [==============================] - 0s 2ms/step - loss: 0.7430 - accuracy: 0.7781 - val_loss: 0.3010 - val_accuracy: 0.9181 Epoch 9/100 152/152 [==============================] - 0s 2ms/step - loss: 0.6734 - accuracy: 0.7984 - val_loss: 0.2636 - val_accuracy: 0.9338 Epoch 10/100 152/152 [==============================] - 0s 3ms/step - loss: 0.6459 - accuracy: 0.8054 - val_loss: 0.2356 - val_accuracy: 0.9487 Epoch 11/100 152/152 [==============================] - 0s 3ms/step - loss: 0.5717 - accuracy: 0.8182 - val_loss: 0.2148 - val_accuracy: 0.9446 Epoch 12/100 152/152 [==============================] - 0s 3ms/step - loss: 0.5360 - accuracy: 0.8342 - val_loss: 0.1975 - val_accuracy: 0.9578 Epoch 13/100 152/152 [==============================] - 0s 3ms/step - loss: 0.5036 - accuracy: 0.8459 - val_loss: 0.1791 - val_accuracy: 0.9653 Epoch 14/100 152/152 [==============================] - 0s 3ms/step - loss: 0.4878 - accuracy: 0.8464 - val_loss: 0.1750 - val_accuracy: 0.9611 Epoch 15/100 152/152 [==============================] - 0s 3ms/step - loss: 0.4729 - accuracy: 0.8497 - val_loss: 0.1649 - val_accuracy: 0.9661 Epoch 16/100 152/152 [==============================] - 0s 3ms/step - loss: 0.4434 - accuracy: 0.8685 - val_loss: 0.1501 - val_accuracy: 0.9562 Epoch 17/100 152/152 [==============================] - 0s 3ms/step - loss: 0.4211 - accuracy: 0.8650 - val_loss: 0.1395 - val_accuracy: 0.9669 Epoch 18/100 152/152 [==============================] - 0s 3ms/step - loss: 0.4120 - accuracy: 0.8660 - val_loss: 0.1255 - val_accuracy: 0.9719 Epoch 19/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3931 - accuracy: 0.8722 - val_loss: 0.1207 - val_accuracy: 0.9686 Epoch 20/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3833 - accuracy: 0.8803 - val_loss: 0.1152 - val_accuracy: 0.9694 Epoch 21/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3605 - accuracy: 0.8854 - val_loss: 0.1125 - val_accuracy: 0.9694 Epoch 22/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3724 - accuracy: 0.8877 - val_loss: 0.1160 - val_accuracy: 0.9702 Epoch 23/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3548 - accuracy: 0.8863 - val_loss: 0.1058 - val_accuracy: 0.9677 Epoch 24/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3312 - accuracy: 0.8931 - val_loss: 0.1042 - val_accuracy: 0.9677 Epoch 25/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3307 - accuracy: 0.8978 - val_loss: 0.1002 - val_accuracy: 0.9735 Epoch 26/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2921 - accuracy: 0.9115 - val_loss: 0.0914 - val_accuracy: 0.9677 Epoch 27/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2871 - accuracy: 0.9107 - val_loss: 0.0916 - val_accuracy: 0.9727 Epoch 28/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2965 - accuracy: 0.9041 - val_loss: 0.0921 - val_accuracy: 0.9735 Epoch 29/100 152/152 [==============================] - 0s 3ms/step - loss: 0.3008 - accuracy: 0.9076 - val_loss: 0.0849 - val_accuracy: 0.9752 Epoch 30/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2967 - accuracy: 0.9067 - val_loss: 0.0897 - val_accuracy: 0.9752 Epoch 31/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2833 - accuracy: 0.9082 - val_loss: 0.0834 - val_accuracy: 0.9735 Epoch 32/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2656 - accuracy: 0.9152 - val_loss: 0.0795 - val_accuracy: 0.9760 Epoch 33/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2500 - accuracy: 0.9241 - val_loss: 0.0862 - val_accuracy: 0.9752 Epoch 34/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2494 - accuracy: 0.9208 - val_loss: 0.0805 - val_accuracy: 0.9711 Epoch 35/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2385 - accuracy: 0.9260 - val_loss: 0.0726 - val_accuracy: 0.9810 Epoch 36/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2327 - accuracy: 0.9235 - val_loss: 0.0722 - val_accuracy: 0.9777 Epoch 37/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2550 - accuracy: 0.9222 - val_loss: 0.0735 - val_accuracy: 0.9801 Epoch 38/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2406 - accuracy: 0.9262 - val_loss: 0.0718 - val_accuracy: 0.9793 Epoch 39/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2153 - accuracy: 0.9316 - val_loss: 0.0665 - val_accuracy: 0.9826 Epoch 40/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2165 - accuracy: 0.9305 - val_loss: 0.0677 - val_accuracy: 0.9818 Epoch 41/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2253 - accuracy: 0.9305 - val_loss: 0.0729 - val_accuracy: 0.9768 Epoch 42/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2216 - accuracy: 0.9316 - val_loss: 0.0702 - val_accuracy: 0.9785 Epoch 43/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2325 - accuracy: 0.9293 - val_loss: 0.0660 - val_accuracy: 0.9843 Epoch 44/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2145 - accuracy: 0.9324 - val_loss: 0.0674 - val_accuracy: 0.9801 Epoch 45/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2226 - accuracy: 0.9347 - val_loss: 0.0586 - val_accuracy: 0.9843 Epoch 46/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2037 - accuracy: 0.9355 - val_loss: 0.0545 - val_accuracy: 0.9843 Epoch 47/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2189 - accuracy: 0.9326 - val_loss: 0.0613 - val_accuracy: 0.9801 Epoch 48/100 152/152 [==============================] - 0s 2ms/step - loss: 0.2091 - accuracy: 0.9336 - val_loss: 0.0666 - val_accuracy: 0.9793 Epoch 49/100 152/152 [==============================] - 0s 3ms/step - loss: 0.2081 - accuracy: 0.9351 - val_loss: 0.0579 - val_accuracy: 0.9826 Epoch 50/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1895 - accuracy: 0.9380 - val_loss: 0.0529 - val_accuracy: 0.9801 Epoch 51/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1956 - accuracy: 0.9371 - val_loss: 0.0570 - val_accuracy: 0.9835 Epoch 52/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1828 - accuracy: 0.9427 - val_loss: 0.0649 - val_accuracy: 0.9810 Epoch 53/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1788 - accuracy: 0.9417 - val_loss: 0.0574 - val_accuracy: 0.9826 Epoch 54/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1889 - accuracy: 0.9409 - val_loss: 0.0556 - val_accuracy: 0.9818 Epoch 55/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1709 - accuracy: 0.9469 - val_loss: 0.0600 - val_accuracy: 0.9826 Epoch 56/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1781 - accuracy: 0.9471 - val_loss: 0.0508 - val_accuracy: 0.9851 Epoch 57/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1769 - accuracy: 0.9479 - val_loss: 0.0551 - val_accuracy: 0.9826 Epoch 58/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1680 - accuracy: 0.9500 - val_loss: 0.0532 - val_accuracy: 0.9810 Epoch 59/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1709 - accuracy: 0.9456 - val_loss: 0.0482 - val_accuracy: 0.9835 Epoch 60/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1848 - accuracy: 0.9446 - val_loss: 0.0513 - val_accuracy: 0.9843 Epoch 61/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1756 - accuracy: 0.9433 - val_loss: 0.0507 - val_accuracy: 0.9835 Epoch 62/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1598 - accuracy: 0.9469 - val_loss: 0.0548 - val_accuracy: 0.9835 Epoch 63/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1605 - accuracy: 0.9500 - val_loss: 0.0521 - val_accuracy: 0.9801 Epoch 64/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1693 - accuracy: 0.9475 - val_loss: 0.0524 - val_accuracy: 0.9810 Epoch 65/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1434 - accuracy: 0.9547 - val_loss: 0.0460 - val_accuracy: 0.9859 Epoch 66/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1529 - accuracy: 0.9533 - val_loss: 0.0534 - val_accuracy: 0.9835 Epoch 67/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1532 - accuracy: 0.9539 - val_loss: 0.0535 - val_accuracy: 0.9876 Epoch 68/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1532 - accuracy: 0.9537 - val_loss: 0.0474 - val_accuracy: 0.9851 Epoch 69/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1444 - accuracy: 0.9572 - val_loss: 0.0486 - val_accuracy: 0.9851 Epoch 70/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1319 - accuracy: 0.9617 - val_loss: 0.0462 - val_accuracy: 0.9851 Epoch 71/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1678 - accuracy: 0.9487 - val_loss: 0.0435 - val_accuracy: 0.9859 Epoch 72/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1571 - accuracy: 0.9498 - val_loss: 0.0506 - val_accuracy: 0.9810 Epoch 73/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1526 - accuracy: 0.9524 - val_loss: 0.0451 - val_accuracy: 0.9843 Epoch 74/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1359 - accuracy: 0.9578 - val_loss: 0.0438 - val_accuracy: 0.9851 Epoch 75/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1498 - accuracy: 0.9516 - val_loss: 0.0452 - val_accuracy: 0.9843 Epoch 76/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1311 - accuracy: 0.9605 - val_loss: 0.0450 - val_accuracy: 0.9843 Epoch 77/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1483 - accuracy: 0.9576 - val_loss: 0.0494 - val_accuracy: 0.9835 Epoch 78/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1546 - accuracy: 0.9526 - val_loss: 0.0511 - val_accuracy: 0.9835 Epoch 79/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1418 - accuracy: 0.9541 - val_loss: 0.0581 - val_accuracy: 0.9801 Epoch 80/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1221 - accuracy: 0.9605 - val_loss: 0.0458 - val_accuracy: 0.9843 Epoch 81/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1106 - accuracy: 0.9634 - val_loss: 0.0394 - val_accuracy: 0.9884 Epoch 82/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1326 - accuracy: 0.9582 - val_loss: 0.0507 - val_accuracy: 0.9818 Epoch 83/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1298 - accuracy: 0.9580 - val_loss: 0.0445 - val_accuracy: 0.9851 Epoch 84/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1083 - accuracy: 0.9667 - val_loss: 0.0497 - val_accuracy: 0.9835 Epoch 85/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1334 - accuracy: 0.9593 - val_loss: 0.0533 - val_accuracy: 0.9843 Epoch 86/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1300 - accuracy: 0.9580 - val_loss: 0.0483 - val_accuracy: 0.9835 Epoch 87/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1262 - accuracy: 0.9615 - val_loss: 0.0491 - val_accuracy: 0.9826 Epoch 88/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1204 - accuracy: 0.9624 - val_loss: 0.0564 - val_accuracy: 0.9826 Epoch 89/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1218 - accuracy: 0.9613 - val_loss: 0.0468 - val_accuracy: 0.9876 Epoch 90/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1357 - accuracy: 0.9589 - val_loss: 0.0492 - val_accuracy: 0.9835 Epoch 91/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1053 - accuracy: 0.9648 - val_loss: 0.0416 - val_accuracy: 0.9901 Epoch 92/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1270 - accuracy: 0.9644 - val_loss: 0.0440 - val_accuracy: 0.9859 Epoch 93/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1154 - accuracy: 0.9634 - val_loss: 0.0466 - val_accuracy: 0.9876 Epoch 94/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1259 - accuracy: 0.9601 - val_loss: 0.0458 - val_accuracy: 0.9843 Epoch 95/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1202 - accuracy: 0.9661 - val_loss: 0.0490 - val_accuracy: 0.9868 Epoch 96/100 152/152 [==============================] - 0s 2ms/step - loss: 0.1131 - accuracy: 0.9638 - val_loss: 0.0505 - val_accuracy: 0.9851 Epoch 97/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1187 - accuracy: 0.9615 - val_loss: 0.0474 - val_accuracy: 0.9868 Epoch 98/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1098 - accuracy: 0.9667 - val_loss: 0.0451 - val_accuracy: 0.9876 Epoch 99/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1056 - accuracy: 0.9620 - val_loss: 0.0544 - val_accuracy: 0.9843 Epoch 100/100 152/152 [==============================] - 0s 3ms/step - loss: 0.1238 - accuracy: 0.9593 - val_loss: 0.0448 - val_accuracy: 0.9876
plot(history)
result = CRI_model_eval(model,Xtest_scale,ytest,cri_encoder)
48/48 [==============================] - 0s 2ms/step
Model Evaluation:
Accuracy Score: 98.68 %
Precision Score: 99.23 %
Recall Score: 98.68 %
F1 Score: 98.82 %
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 43
1 1.00 1.00 1.00 42
2 1.00 1.00 1.00 38
3 1.00 1.00 1.00 48
4 1.00 0.93 0.96 41
5 1.00 1.00 1.00 39
6 1.00 1.00 1.00 47
7 1.00 1.00 1.00 63
8 1.00 1.00 1.00 47
9 1.00 1.00 1.00 57
10 1.00 1.00 1.00 56
11 1.00 0.98 0.99 46
12 1.00 0.98 0.99 41
13 1.00 1.00 1.00 36
14 1.00 1.00 1.00 42
15 1.00 0.84 0.91 49
16 0.58 1.00 0.74 28
17 1.00 1.00 1.00 34
18 1.00 0.98 0.99 44
19 1.00 1.00 1.00 36
20 1.00 0.92 0.96 48
21 1.00 1.00 1.00 41
22 1.00 1.00 1.00 47
23 1.00 0.98 0.99 51
24 1.00 1.00 1.00 51
25 1.00 1.00 1.00 52
26 1.00 1.00 1.00 61
27 1.00 1.00 1.00 48
28 1.00 1.00 1.00 38
29 1.00 1.00 1.00 43
30 1.00 0.98 0.99 56
31 1.00 1.00 1.00 44
32 1.00 1.00 1.00 55
accuracy 0.99 1512
macro avg 0.99 0.99 0.99 1512
weighted avg 0.99 0.99 0.99 1512
48/48 [==============================] - 0s 1ms/step
LSTM Model
2.C Simple LSTM Classifier for Predicting the Accident Level
# Read the data from the CSV file 'Cleaned Data.csv' using pandas read_csv function
data = pd.read_csv('Cleaned Data.csv')
# Extract the input features (X) and target labels (y) from the data
# 'Cleaned_Description' column contains the input features (X)
# 'Accident Level' column contains the target labels (y)
X = data['Cleaned_Description']
y = data['Accident Level']
# The first step in word embeddings is to convert the words into thier corresponding numeric indexes.
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(X)
X_token = tokenizer.texts_to_sequences(X)
# Sentences can have different lengths, and therefore the sequences returned by the Tokenizer class also consist of variable lengths.
# We need to pad the our sequences using the max length.
vocab_size = len(tokenizer.word_index) + 1
print("vocab_size:", vocab_size)
maxlen = 200
Xt_voc = pad_sequences(X_token, padding='post', maxlen=maxlen)
vocab_size: 3008
from imblearn.over_sampling import RandomOverSampler
# Oversample the data using RandomOverSampler to address class imbalance
X, y = RandomOverSampler().fit_resample(Xt_voc, y)
# Split the data into training and testing sets with a test_size of 15%
Xtrain_acc, Xtest_acc, ytrain_acc, ytest_acc = train_test_split(X, y, test_size=0.15, random_state=24)
# Further split the training data into training and validation sets with a validation size of 15%
Xtrain_acc, Xval_acc, ytrain_acc, yval_acc = train_test_split(Xtrain_acc, ytrain_acc, test_size=0.15, random_state=124)
# Convert integer target labels to one-hot encoded format
ytrain_acc_label = to_categorical(ytrain_acc)
yval_acc_label = to_categorical(yval_acc)
# We need to load the built-in GloVe word embeddings
embedding_size = 200
embeddings_dictionary = dict()
glove_file = open('glove.6B.200d.txt', encoding="utf8")
for line in glove_file:
records = line.split()
word = records[0]
vector_dimensions = np.asarray(records[1:], dtype='float32')
embeddings_dictionary[word] = vector_dimensions
glove_file.close()
embedding_matrix = np.zeros((vocab_size, embedding_size))
for word, index in tokenizer.word_index.items():
embedding_vector = embeddings_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
inputs = Input(shape=(Xtrain_acc.shape[1],))
# Word Embedding Layer
embedding = Embedding(vocab_size, embedding_size, weights=[embedding_matrix], trainable=False)(inputs)
# LSTM Layer
LSTM1 = LSTM(128, return_sequences=True)(embedding)
# Global Max Pooling Layer
pooling_1 = GlobalMaxPool1D()(LSTM1)
# Dropout Layer
drop_out1 = Dropout(0.5, input_shape=(256,))(pooling_1)
# Dense Layer 1
dense1 = Dense(128, activation='relu')(drop_out1)
# Dropout Layer
drop_out2 = Dropout(0.5, input_shape=(128,))(dense1)
# Output Dense Layer with Softmax Activation
dense2 = Dense(5, activation='softmax')(drop_out2)
# Model Compilation
model = Model(inputs=inputs, outputs=dense2)
print(model.summary())
Model: "model_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_8 (InputLayer) [(None, 200)] 0
embedding_5 (Embedding) (None, 200, 200) 601600
lstm_5 (LSTM) (None, 200, 128) 168448
global_max_pooling1d_5 (Glo (None, 128) 0
balMaxPooling1D)
dropout_21 (Dropout) (None, 128) 0
dense_29 (Dense) (None, 128) 16512
dropout_22 (Dropout) (None, 128) 0
dense_30 (Dense) (None, 5) 645
=================================================================
Total params: 787,205
Trainable params: 185,605
Non-trainable params: 601,600
_________________________________________________________________
None
# Compile the model with the specified loss function, optimizer, and metrics
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Use earlystopping
# callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5, min_delta=0.001)
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7, min_delta=1E-3)
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.0001, patience=5, min_delta=1E-4)
# Start the model training process using the fit() function
# Xtrain_acc: Training input features
# ytrain_acc_label: One-hot encoded target labels for the training data
# epochs: Number of epochs for training
# validation_data: Validation data to evaluate the model's performance after each epoch
# callbacks: List of callbacks to monitor the training process and make adjustments
history = model.fit(Xtrain_acc, ytrain_acc_label, epochs=100, validation_data=(Xval_acc, yval_acc_label), callbacks=[callback, rlrp])
model.save('accidentlevel.h5',history) #saving the model
Epoch 1/100 35/35 [==============================] - 8s 164ms/step - loss: 1.5625 - accuracy: 0.2796 - val_loss: 1.3396 - val_accuracy: 0.5939 - lr: 0.0010 Epoch 2/100 35/35 [==============================] - 5s 150ms/step - loss: 1.2471 - accuracy: 0.4803 - val_loss: 0.9768 - val_accuracy: 0.6599 - lr: 0.0010 Epoch 3/100 35/35 [==============================] - 5s 156ms/step - loss: 0.9503 - accuracy: 0.6084 - val_loss: 0.6616 - val_accuracy: 0.8528 - lr: 0.0010 Epoch 4/100 35/35 [==============================] - 5s 148ms/step - loss: 0.7020 - accuracy: 0.7159 - val_loss: 0.5082 - val_accuracy: 0.8376 - lr: 0.0010 Epoch 5/100 35/35 [==============================] - 5s 145ms/step - loss: 0.5850 - accuracy: 0.7527 - val_loss: 0.3952 - val_accuracy: 0.8985 - lr: 0.0010 Epoch 6/100 35/35 [==============================] - 5s 144ms/step - loss: 0.4220 - accuracy: 0.8315 - val_loss: 0.3141 - val_accuracy: 0.9036 - lr: 0.0010 Epoch 7/100 35/35 [==============================] - 5s 156ms/step - loss: 0.3668 - accuracy: 0.8602 - val_loss: 0.2008 - val_accuracy: 0.9492 - lr: 0.0010 Epoch 8/100 35/35 [==============================] - 6s 166ms/step - loss: 0.2559 - accuracy: 0.9086 - val_loss: 0.1689 - val_accuracy: 0.9543 - lr: 0.0010 Epoch 9/100 35/35 [==============================] - 7s 187ms/step - loss: 0.2550 - accuracy: 0.9086 - val_loss: 0.2328 - val_accuracy: 0.9086 - lr: 0.0010 Epoch 10/100 35/35 [==============================] - 6s 177ms/step - loss: 0.2138 - accuracy: 0.9211 - val_loss: 0.1371 - val_accuracy: 0.9746 - lr: 0.0010 Epoch 11/100 35/35 [==============================] - 6s 160ms/step - loss: 0.1311 - accuracy: 0.9642 - val_loss: 0.2577 - val_accuracy: 0.9442 - lr: 0.0010 Epoch 12/100 35/35 [==============================] - 6s 164ms/step - loss: 0.1418 - accuracy: 0.9543 - val_loss: 0.9503 - val_accuracy: 0.7665 - lr: 0.0010 Epoch 13/100 35/35 [==============================] - 6s 168ms/step - loss: 0.3485 - accuracy: 0.9086 - val_loss: 0.1218 - val_accuracy: 0.9746 - lr: 0.0010 Epoch 14/100 35/35 [==============================] - 6s 160ms/step - loss: 0.2065 - accuracy: 0.9364 - val_loss: 0.1294 - val_accuracy: 0.9695 - lr: 0.0010 Epoch 15/100 35/35 [==============================] - 6s 165ms/step - loss: 0.1219 - accuracy: 0.9713 - val_loss: 0.1194 - val_accuracy: 0.9645 - lr: 0.0010 Epoch 16/100 35/35 [==============================] - 6s 160ms/step - loss: 0.0819 - accuracy: 0.9830 - val_loss: 0.0514 - val_accuracy: 0.9898 - lr: 0.0010 Epoch 17/100 35/35 [==============================] - 6s 162ms/step - loss: 0.0693 - accuracy: 0.9857 - val_loss: 0.0646 - val_accuracy: 0.9848 - lr: 0.0010 Epoch 18/100 35/35 [==============================] - 6s 162ms/step - loss: 0.0624 - accuracy: 0.9794 - val_loss: 0.1502 - val_accuracy: 0.9543 - lr: 0.0010 Epoch 19/100 35/35 [==============================] - 6s 181ms/step - loss: 0.0819 - accuracy: 0.9776 - val_loss: 0.4272 - val_accuracy: 0.8782 - lr: 0.0010 Epoch 20/100 35/35 [==============================] - 6s 176ms/step - loss: 0.1842 - accuracy: 0.9400 - val_loss: 0.1452 - val_accuracy: 0.9391 - lr: 0.0010 Epoch 21/100 35/35 [==============================] - 6s 160ms/step - loss: 0.0927 - accuracy: 0.9713 - val_loss: 0.1862 - val_accuracy: 0.9594 - lr: 0.0010 Epoch 22/100 35/35 [==============================] - 6s 167ms/step - loss: 0.1300 - accuracy: 0.9570 - val_loss: 0.1858 - val_accuracy: 0.9594 - lr: 1.0000e-07 Epoch 23/100 35/35 [==============================] - 5s 157ms/step - loss: 0.1099 - accuracy: 0.9686 - val_loss: 0.1854 - val_accuracy: 0.9594 - lr: 1.0000e-07 Epoch 24/100 35/35 [==============================] - 6s 167ms/step - loss: 0.1126 - accuracy: 0.9615 - val_loss: 0.1850 - val_accuracy: 0.9594 - lr: 1.0000e-07 Epoch 25/100 35/35 [==============================] - 6s 165ms/step - loss: 0.1138 - accuracy: 0.9624 - val_loss: 0.1845 - val_accuracy: 0.9594 - lr: 1.0000e-07
plot(history)
# Evaluate the model's performance on the test dataset using the ACC_model_eval function
result = ACC_model_eval(model, Xtest_acc, ytest_acc)
8/8 [==============================] - 1s 51ms/step
Model Evaluation:
Accuracy Score: 96.98 %
Precision Score: 97.41 %
Recall Score: 96.98 %
F1 Score: 97.05 %
Classification Report:
precision recall f1-score support
0 0.84 1.00 0.91 31
1 1.00 0.95 0.97 61
2 0.98 1.00 0.99 45
3 1.00 0.92 0.96 50
4 1.00 1.00 1.00 45
accuracy 0.97 232
macro avg 0.96 0.97 0.97 232
weighted avg 0.97 0.97 0.97 232
Confusion Matrix:
2.D Simple LSTM Classifier for Predicting the Crictical Risk
data = pd.read_csv('Cleaned Data.csv')
X = data['Cleaned_Description']
cri_encoder = LabelEncoder()
y = cri_encoder.fit_transform(critical_risk)
meta = data['Accident Level'].to_numpy()
# The first step in word embeddings is to convert the words into thier corresponding numeric indexes.
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
len(X)
418
# Sentences can have different lengths, and therefore the sequences returned by the Tokenizer class also consist of variable lengths.
# We need to pad the our sequences using the max length.
vocab_size = len(tokenizer.word_index) + 1
print("vocab_size:", vocab_size)
maxlen = 200
X = pad_sequences(X, padding='post', maxlen=maxlen)
len(X)
vocab_size: 3008
418
X_new = np.column_stack((X,meta))
X,y = RandomOverSampler().fit_resample(X_new,y)
Xtrain_cri,Xtest_cri,ytrain_cri,ytest_cri = train_test_split(X,y,test_size=0.15,random_state=24)
Xtrain_cri,Xval_cri,ytrain_cri,yval_cri = train_test_split(Xtrain_cri,ytrain_cri,test_size=0.15,random_state=124)
ytrain_label = to_categorical(ytrain_cri)
yval_label = to_categorical(yval_cri)
# Building a Simple LSTM model
# Define the input layer for the sequential input data (Xtrain_cri)
inputs = Input(shape=(Xtrain_cri.shape[1],))
# Define the input layer for the meta information (5 additional features)
meta_input = Input(shape=(5,))
# Create an embedding layer to convert words to dense vectors
embedding = Embedding(vocab_size, embedding_size, weights=[embedding_matrix], trainable=False)(inputs)
# Add an LSTM layer with 128 units and return the sequences
LSTM1 = LSTM(128, return_sequences=True)(embedding)
# Apply global max pooling over the LSTM output sequence
pooling_1 = GlobalMaxPool1D()(LSTM1)
# Apply dropout to regularize the model
drop_out1 = Dropout(0.5, input_shape=(256,))(pooling_1)
# Add a dense layer with 128 neurons and ReLU activation
dense1 = Dense(128, activation='relu')(drop_out1)
# Apply dropout again to further regularize the model
drop_out2 = Dropout(0.5, input_shape=(128,))(dense1)
# Add the output dense layer with softmax activation for multi-class classification
dense2 = Dense(ytrain_label.shape[1], activation='softmax')(drop_out2)
# Create the final model with input and output layers
model = Model(inputs=inputs, outputs=dense2)
print(model.summary())
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 201)] 0
embedding_1 (Embedding) (None, 201, 200) 601600
lstm_1 (LSTM) (None, 201, 128) 168448
global_max_pooling1d_1 (Glo (None, 128) 0
balMaxPooling1D)
dropout_10 (Dropout) (None, 128) 0
dense_18 (Dense) (None, 128) 16512
dropout_11 (Dropout) (None, 128) 0
dense_19 (Dense) (None, 33) 4257
=================================================================
Total params: 790,817
Trainable params: 189,217
Non-trainable params: 601,600
_________________________________________________________________
None
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Use earlystopping
# callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5, min_delta=0.001)
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7, min_delta=1E-3)
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.0001, patience=5, min_delta=1E-4)
history = model.fit(Xtrain_cri,ytrain_label, epochs=100,validation_data=(Xval_cri, yval_label),callbacks=[callback, rlrp])
Epoch 1/100 171/171 [==============================] - 26s 142ms/step - loss: 2.3938 - accuracy: 0.3942 - val_loss: 0.7762 - val_accuracy: 0.8849 - lr: 0.0010 Epoch 2/100 171/171 [==============================] - 24s 138ms/step - loss: 0.7954 - accuracy: 0.7908 - val_loss: 0.2225 - val_accuracy: 0.9616 - lr: 0.0010 Epoch 3/100 171/171 [==============================] - 26s 154ms/step - loss: 0.4024 - accuracy: 0.8950 - val_loss: 0.1036 - val_accuracy: 0.9834 - lr: 0.0010 Epoch 4/100 171/171 [==============================] - 23s 137ms/step - loss: 0.2243 - accuracy: 0.9461 - val_loss: 0.0838 - val_accuracy: 0.9803 - lr: 0.0010 Epoch 5/100 171/171 [==============================] - 23s 137ms/step - loss: 0.1619 - accuracy: 0.9599 - val_loss: 0.0819 - val_accuracy: 0.9793 - lr: 0.0010 Epoch 6/100 171/171 [==============================] - 23s 137ms/step - loss: 0.1401 - accuracy: 0.9654 - val_loss: 0.0686 - val_accuracy: 0.9844 - lr: 0.0010 Epoch 7/100 171/171 [==============================] - 27s 156ms/step - loss: 0.1236 - accuracy: 0.9705 - val_loss: 0.0666 - val_accuracy: 0.9793 - lr: 0.0010 Epoch 8/100 171/171 [==============================] - 25s 148ms/step - loss: 0.0835 - accuracy: 0.9800 - val_loss: 0.0406 - val_accuracy: 0.9896 - lr: 0.0010 Epoch 9/100 171/171 [==============================] - 23s 137ms/step - loss: 0.0657 - accuracy: 0.9863 - val_loss: 0.0617 - val_accuracy: 0.9824 - lr: 0.0010 Epoch 10/100 171/171 [==============================] - 29s 170ms/step - loss: 0.0660 - accuracy: 0.9844 - val_loss: 0.0548 - val_accuracy: 0.9855 - lr: 0.0010 Epoch 11/100 171/171 [==============================] - 26s 150ms/step - loss: 0.0360 - accuracy: 0.9925 - val_loss: 0.0483 - val_accuracy: 0.9886 - lr: 0.0010 Epoch 12/100 171/171 [==============================] - 27s 158ms/step - loss: 0.0873 - accuracy: 0.9775 - val_loss: 0.0415 - val_accuracy: 0.9896 - lr: 0.0010 Epoch 13/100 171/171 [==============================] - 27s 158ms/step - loss: 0.0696 - accuracy: 0.9824 - val_loss: 0.0448 - val_accuracy: 0.9876 - lr: 0.0010 Epoch 14/100 171/171 [==============================] - 26s 154ms/step - loss: 0.0631 - accuracy: 0.9852 - val_loss: 0.0447 - val_accuracy: 0.9876 - lr: 1.0000e-07 Epoch 15/100 171/171 [==============================] - 27s 156ms/step - loss: 0.0542 - accuracy: 0.9855 - val_loss: 0.0447 - val_accuracy: 0.9876 - lr: 1.0000e-07 Epoch 16/100 171/171 [==============================] - 28s 163ms/step - loss: 0.0572 - accuracy: 0.9855 - val_loss: 0.0447 - val_accuracy: 0.9876 - lr: 1.0000e-07 Epoch 17/100 171/171 [==============================] - 26s 151ms/step - loss: 0.0560 - accuracy: 0.9857 - val_loss: 0.0447 - val_accuracy: 0.9876 - lr: 1.0000e-07 Epoch 18/100 171/171 [==============================] - 25s 148ms/step - loss: 0.0584 - accuracy: 0.9855 - val_loss: 0.0447 - val_accuracy: 0.9876 - lr: 1.0000e-07
plot(history)
result = CRI_model_eval(model,Xtest_cri,ytest_cri,cri_encoder)
36/36 [==============================] - 2s 51ms/step
Model Evaluation:
Accuracy Score: 97.8 %
Precision Score: 98.57 %
Recall Score: 97.8 %
F1 Score: 97.98 %
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 27
1 1.00 0.97 0.98 29
2 1.00 1.00 1.00 30
3 1.00 0.89 0.94 46
4 1.00 0.97 0.99 37
5 1.00 1.00 1.00 43
6 1.00 1.00 1.00 31
7 1.00 1.00 1.00 41
8 1.00 1.00 1.00 30
9 1.00 0.91 0.95 45
10 1.00 0.89 0.94 38
11 1.00 1.00 1.00 42
12 1.00 1.00 1.00 34
13 1.00 1.00 1.00 39
14 1.00 1.00 1.00 29
15 1.00 0.83 0.90 23
16 0.59 1.00 0.74 29
17 1.00 1.00 1.00 35
18 1.00 1.00 1.00 31
19 1.00 1.00 1.00 30
20 1.00 0.97 0.99 39
21 1.00 1.00 1.00 33
22 1.00 1.00 1.00 33
23 0.88 1.00 0.94 37
24 1.00 1.00 1.00 34
25 1.00 1.00 1.00 36
26 1.00 1.00 1.00 31
27 1.00 1.00 1.00 33
28 1.00 1.00 1.00 34
29 1.00 1.00 1.00 22
30 1.00 0.95 0.97 41
31 1.00 0.97 0.99 35
32 1.00 0.95 0.97 37
accuracy 0.98 1134
macro avg 0.98 0.98 0.98 1134
weighted avg 0.99 0.98 0.98 1134
36/36 [==============================] - 2s 51ms/step
2.E Bidirectional LSTM for the Classification the Accident Level
# Building Bi-directional LSTM Model
inputs = Input(shape=(Xtrain_acc.shape[1]),)
embedding = Embedding(vocab_size, embedding_size, weights=[embedding_matrix], trainable=False)(inputs)
LSTM1 = Bidirectional(LSTM(256, return_sequences = True))(embedding)
maxpooling = GlobalMaxPool1D()(LSTM1)
drop_out1 = Dropout(0.5, input_shape = (384,))(maxpooling)
dense1 = Dense(128, activation = 'relu')(drop_out1)
drop_out2 = Dropout(0.5, input_shape = (256,))(dense1)
dense2 = Dense(64, activation = 'relu')(drop_out2)
drop_out3 = Dropout(0.5, input_shape = (128,))(dense2)
dense3 = Dense(10, activation = 'relu')(drop_out3)
drop_out4 = Dropout(0.5, input_shape = (64,))(dense3)
dense4 = Dense(5, activation='softmax')(drop_out4)
acc_lvl_classifier = Model(inputs = inputs, outputs= dense4)
opt = SGD(lr=0.01, momentum=0.9)
acc_lvl_classifier.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
print(acc_lvl_classifier.summary())
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 200)] 0
embedding_2 (Embedding) (None, 200, 200) 601600
bidirectional (Bidirectiona (None, 200, 512) 935936
l)
global_max_pooling1d_2 (Glo (None, 512) 0
balMaxPooling1D)
dropout_12 (Dropout) (None, 512) 0
dense_20 (Dense) (None, 128) 65664
dropout_13 (Dropout) (None, 128) 0
dense_21 (Dense) (None, 64) 8256
dropout_14 (Dropout) (None, 64) 0
dense_22 (Dense) (None, 10) 650
dropout_15 (Dropout) (None, 10) 0
dense_23 (Dense) (None, 5) 55
=================================================================
Total params: 1,612,161
Trainable params: 1,010,561
Non-trainable params: 601,600
_________________________________________________________________
None
# Use earlystopping
# callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5, min_delta=0.001)
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7, min_delta=1E-3)
rlrp = ReduceLROnPlateau(monitor='val_loss', factor=0.0001, patience=5, min_delta=1E-4)
history = acc_lvl_classifier.fit(Xtrain_acc, ytrain_acc_label, epochs=100,validation_data=(Xval_acc, yval_acc_label),callbacks=[callback, rlrp])
acc_lvl_classifier.save('accident level.h5',history) #saving the model
Epoch 1/100 35/35 [==============================] - 89s 2s/step - loss: 1.6488 - accuracy: 0.2249 - val_loss: 1.6100 - val_accuracy: 0.1980 - lr: 0.0100 Epoch 2/100 35/35 [==============================] - 83s 2s/step - loss: 1.6166 - accuracy: 0.2357 - val_loss: 1.6097 - val_accuracy: 0.1980 - lr: 0.0100 Epoch 3/100 35/35 [==============================] - 83s 2s/step - loss: 1.6093 - accuracy: 0.2159 - val_loss: 1.6090 - val_accuracy: 0.1980 - lr: 0.0100 Epoch 4/100 35/35 [==============================] - 85s 2s/step - loss: 1.6010 - accuracy: 0.2231 - val_loss: 1.6043 - val_accuracy: 0.1980 - lr: 0.0100 Epoch 5/100 35/35 [==============================] - 85s 2s/step - loss: 1.6015 - accuracy: 0.2276 - val_loss: 1.5920 - val_accuracy: 0.2437 - lr: 0.0100 Epoch 6/100 35/35 [==============================] - 76s 2s/step - loss: 1.6011 - accuracy: 0.2392 - val_loss: 1.5683 - val_accuracy: 0.3959 - lr: 0.0100 Epoch 7/100 35/35 [==============================] - 76s 2s/step - loss: 1.5847 - accuracy: 0.2509 - val_loss: 1.5341 - val_accuracy: 0.3858 - lr: 0.0100 Epoch 8/100 35/35 [==============================] - 77s 2s/step - loss: 1.5443 - accuracy: 0.2751 - val_loss: 1.4876 - val_accuracy: 0.4213 - lr: 0.0100 Epoch 9/100 35/35 [==============================] - 74s 2s/step - loss: 1.4785 - accuracy: 0.2885 - val_loss: 1.3019 - val_accuracy: 0.4264 - lr: 0.0100 Epoch 10/100 35/35 [==============================] - 76s 2s/step - loss: 1.4060 - accuracy: 0.3495 - val_loss: 1.2101 - val_accuracy: 0.4873 - lr: 0.0100 Epoch 11/100 35/35 [==============================] - 76s 2s/step - loss: 1.3287 - accuracy: 0.3719 - val_loss: 1.1393 - val_accuracy: 0.4569 - lr: 0.0100 Epoch 12/100 35/35 [==============================] - 74s 2s/step - loss: 1.2715 - accuracy: 0.4023 - val_loss: 1.0940 - val_accuracy: 0.4365 - lr: 0.0100 Epoch 13/100 35/35 [==============================] - 77s 2s/step - loss: 1.2368 - accuracy: 0.3952 - val_loss: 1.0869 - val_accuracy: 0.4518 - lr: 0.0100 Epoch 14/100 35/35 [==============================] - 77s 2s/step - loss: 1.2145 - accuracy: 0.4095 - val_loss: 1.0997 - val_accuracy: 0.4061 - lr: 0.0100 Epoch 15/100 35/35 [==============================] - 79s 2s/step - loss: 1.1901 - accuracy: 0.4041 - val_loss: 1.0756 - val_accuracy: 0.4822 - lr: 0.0100 Epoch 16/100 35/35 [==============================] - 78s 2s/step - loss: 1.2091 - accuracy: 0.3898 - val_loss: 1.0926 - val_accuracy: 0.4518 - lr: 0.0100 Epoch 17/100 35/35 [==============================] - 81s 2s/step - loss: 1.2260 - accuracy: 0.4301 - val_loss: 1.0534 - val_accuracy: 0.7157 - lr: 0.0100 Epoch 18/100 35/35 [==============================] - 91s 3s/step - loss: 1.2013 - accuracy: 0.4292 - val_loss: 1.0269 - val_accuracy: 0.6396 - lr: 0.0100 Epoch 19/100 35/35 [==============================] - 85s 2s/step - loss: 1.1419 - accuracy: 0.4659 - val_loss: 0.9917 - val_accuracy: 0.6853 - lr: 0.0100 Epoch 20/100 35/35 [==============================] - 81s 2s/step - loss: 1.1250 - accuracy: 0.4633 - val_loss: 0.9605 - val_accuracy: 0.7056 - lr: 0.0100 Epoch 21/100 35/35 [==============================] - 82s 2s/step - loss: 1.0940 - accuracy: 0.4803 - val_loss: 0.9186 - val_accuracy: 0.6650 - lr: 0.0100 Epoch 22/100 35/35 [==============================] - 86s 2s/step - loss: 1.0608 - accuracy: 0.5367 - val_loss: 0.8244 - val_accuracy: 0.7056 - lr: 0.0100 Epoch 23/100 35/35 [==============================] - 85s 2s/step - loss: 1.0420 - accuracy: 0.5385 - val_loss: 0.8942 - val_accuracy: 0.5482 - lr: 0.0100 Epoch 24/100 35/35 [==============================] - 85s 2s/step - loss: 1.0316 - accuracy: 0.5161 - val_loss: 0.7907 - val_accuracy: 0.7665 - lr: 0.0100 Epoch 25/100 35/35 [==============================] - 85s 2s/step - loss: 0.9681 - accuracy: 0.5663 - val_loss: 0.7572 - val_accuracy: 0.6802 - lr: 0.0100 Epoch 26/100 35/35 [==============================] - 85s 2s/step - loss: 0.9518 - accuracy: 0.5565 - val_loss: 0.7272 - val_accuracy: 0.6954 - lr: 0.0100 Epoch 27/100 35/35 [==============================] - 87s 3s/step - loss: 0.9229 - accuracy: 0.5717 - val_loss: 0.7151 - val_accuracy: 0.7208 - lr: 0.0100 Epoch 28/100 35/35 [==============================] - 82s 2s/step - loss: 0.8815 - accuracy: 0.5851 - val_loss: 0.6851 - val_accuracy: 0.7005 - lr: 0.0100 Epoch 29/100 35/35 [==============================] - 80s 2s/step - loss: 0.8844 - accuracy: 0.5941 - val_loss: 0.7908 - val_accuracy: 0.5431 - lr: 0.0100 Epoch 30/100 35/35 [==============================] - 79s 2s/step - loss: 0.9273 - accuracy: 0.5744 - val_loss: 0.7492 - val_accuracy: 0.6853 - lr: 0.0100 Epoch 31/100 35/35 [==============================] - 80s 2s/step - loss: 0.8700 - accuracy: 0.5914 - val_loss: 0.6488 - val_accuracy: 0.7005 - lr: 0.0100 Epoch 32/100 35/35 [==============================] - 80s 2s/step - loss: 0.8699 - accuracy: 0.5968 - val_loss: 0.6461 - val_accuracy: 0.7614 - lr: 0.0100 Epoch 33/100 35/35 [==============================] - 80s 2s/step - loss: 0.8403 - accuracy: 0.6013 - val_loss: 0.6377 - val_accuracy: 0.6802 - lr: 0.0100 Epoch 34/100 35/35 [==============================] - 81s 2s/step - loss: 0.9931 - accuracy: 0.5573 - val_loss: 0.7956 - val_accuracy: 0.5330 - lr: 0.0100 Epoch 35/100 35/35 [==============================] - 81s 2s/step - loss: 1.0431 - accuracy: 0.5170 - val_loss: 0.6768 - val_accuracy: 0.7157 - lr: 0.0100 Epoch 36/100 35/35 [==============================] - 83s 2s/step - loss: 0.8887 - accuracy: 0.6102 - val_loss: 0.6748 - val_accuracy: 0.7360 - lr: 0.0100 Epoch 37/100 35/35 [==============================] - 84s 2s/step - loss: 0.7928 - accuracy: 0.6443 - val_loss: 0.6777 - val_accuracy: 0.6904 - lr: 0.0100 Epoch 38/100 35/35 [==============================] - 83s 2s/step - loss: 0.7866 - accuracy: 0.6317 - val_loss: 0.6026 - val_accuracy: 0.8122 - lr: 0.0100 Epoch 39/100 35/35 [==============================] - 83s 2s/step - loss: 0.7984 - accuracy: 0.6371 - val_loss: 0.5836 - val_accuracy: 0.7563 - lr: 0.0100 Epoch 40/100 35/35 [==============================] - 82s 2s/step - loss: 0.7659 - accuracy: 0.6756 - val_loss: 0.5367 - val_accuracy: 0.8528 - lr: 0.0100 Epoch 41/100 35/35 [==============================] - 82s 2s/step - loss: 0.7288 - accuracy: 0.6864 - val_loss: 0.6113 - val_accuracy: 0.6701 - lr: 0.0100 Epoch 42/100 35/35 [==============================] - 83s 2s/step - loss: 0.7401 - accuracy: 0.6774 - val_loss: 0.5635 - val_accuracy: 0.7462 - lr: 0.0100 Epoch 43/100 35/35 [==============================] - 83s 2s/step - loss: 0.6946 - accuracy: 0.7007 - val_loss: 0.4850 - val_accuracy: 0.8376 - lr: 0.0100 Epoch 44/100 35/35 [==============================] - 82s 2s/step - loss: 0.6399 - accuracy: 0.7321 - val_loss: 0.4574 - val_accuracy: 0.8223 - lr: 0.0100 Epoch 45/100 35/35 [==============================] - 83s 2s/step - loss: 0.6719 - accuracy: 0.7115 - val_loss: 0.5998 - val_accuracy: 0.7970 - lr: 0.0100 Epoch 46/100 35/35 [==============================] - 83s 2s/step - loss: 0.6979 - accuracy: 0.6962 - val_loss: 0.4336 - val_accuracy: 0.9188 - lr: 0.0100 Epoch 47/100 35/35 [==============================] - 83s 2s/step - loss: 0.6556 - accuracy: 0.7339 - val_loss: 0.4453 - val_accuracy: 0.7919 - lr: 0.0100 Epoch 48/100 35/35 [==============================] - 83s 2s/step - loss: 0.6023 - accuracy: 0.7518 - val_loss: 0.4152 - val_accuracy: 0.8883 - lr: 0.0100 Epoch 49/100 35/35 [==============================] - 83s 2s/step - loss: 0.5988 - accuracy: 0.7634 - val_loss: 0.3613 - val_accuracy: 0.8985 - lr: 0.0100 Epoch 50/100 35/35 [==============================] - 82s 2s/step - loss: 0.6464 - accuracy: 0.7509 - val_loss: 0.4004 - val_accuracy: 0.9036 - lr: 0.0100 Epoch 51/100 35/35 [==============================] - 83s 2s/step - loss: 0.5541 - accuracy: 0.7885 - val_loss: 0.3558 - val_accuracy: 0.9188 - lr: 0.0100 Epoch 52/100 35/35 [==============================] - 84s 2s/step - loss: 0.5562 - accuracy: 0.7751 - val_loss: 0.4521 - val_accuracy: 0.7310 - lr: 0.0100 Epoch 53/100 35/35 [==============================] - 83s 2s/step - loss: 0.5738 - accuracy: 0.7527 - val_loss: 0.3743 - val_accuracy: 0.8629 - lr: 0.0100 Epoch 54/100 35/35 [==============================] - 82s 2s/step - loss: 0.5559 - accuracy: 0.7697 - val_loss: 0.3479 - val_accuracy: 0.9137 - lr: 0.0100 Epoch 55/100 35/35 [==============================] - 83s 2s/step - loss: 0.5898 - accuracy: 0.7545 - val_loss: 0.3756 - val_accuracy: 0.8782 - lr: 0.0100 Epoch 56/100 35/35 [==============================] - 83s 2s/step - loss: 0.5588 - accuracy: 0.7473 - val_loss: 0.3496 - val_accuracy: 0.8528 - lr: 0.0100 Epoch 57/100 35/35 [==============================] - 83s 2s/step - loss: 0.5328 - accuracy: 0.7805 - val_loss: 0.3178 - val_accuracy: 0.9340 - lr: 0.0100 Epoch 58/100 35/35 [==============================] - 83s 2s/step - loss: 0.5442 - accuracy: 0.7661 - val_loss: 0.3319 - val_accuracy: 0.8731 - lr: 0.0100 Epoch 59/100 35/35 [==============================] - 83s 2s/step - loss: 0.5188 - accuracy: 0.7921 - val_loss: 0.3041 - val_accuracy: 0.9340 - lr: 0.0100 Epoch 60/100 35/35 [==============================] - 83s 2s/step - loss: 0.4970 - accuracy: 0.7957 - val_loss: 0.3163 - val_accuracy: 0.9289 - lr: 0.0100 Epoch 61/100 35/35 [==============================] - 83s 2s/step - loss: 0.5036 - accuracy: 0.7975 - val_loss: 0.3653 - val_accuracy: 0.8629 - lr: 0.0100 Epoch 62/100 35/35 [==============================] - 83s 2s/step - loss: 0.5137 - accuracy: 0.7966 - val_loss: 0.3021 - val_accuracy: 0.9340 - lr: 0.0100 Epoch 63/100 35/35 [==============================] - 83s 2s/step - loss: 0.4842 - accuracy: 0.7993 - val_loss: 0.3249 - val_accuracy: 0.8477 - lr: 0.0100 Epoch 64/100 35/35 [==============================] - 83s 2s/step - loss: 0.5061 - accuracy: 0.7796 - val_loss: 0.3130 - val_accuracy: 0.9340 - lr: 0.0100 Epoch 65/100 35/35 [==============================] - 83s 2s/step - loss: 0.4828 - accuracy: 0.8145 - val_loss: 0.2722 - val_accuracy: 0.9543 - lr: 0.0100 Epoch 66/100 35/35 [==============================] - 82s 2s/step - loss: 0.4641 - accuracy: 0.8100 - val_loss: 0.2622 - val_accuracy: 0.9442 - lr: 0.0100 Epoch 67/100 35/35 [==============================] - 82s 2s/step - loss: 0.4678 - accuracy: 0.8047 - val_loss: 0.3936 - val_accuracy: 0.7614 - lr: 0.0100 Epoch 68/100 35/35 [==============================] - 83s 2s/step - loss: 0.5082 - accuracy: 0.7876 - val_loss: 0.3915 - val_accuracy: 0.9036 - lr: 0.0100 Epoch 69/100 35/35 [==============================] - 83s 2s/step - loss: 0.5641 - accuracy: 0.7715 - val_loss: 0.3257 - val_accuracy: 0.9340 - lr: 0.0100 Epoch 70/100 35/35 [==============================] - 82s 2s/step - loss: 0.4682 - accuracy: 0.8011 - val_loss: 0.3198 - val_accuracy: 0.9543 - lr: 0.0100 Epoch 71/100 35/35 [==============================] - 83s 2s/step - loss: 0.4777 - accuracy: 0.8091 - val_loss: 0.2870 - val_accuracy: 0.9594 - lr: 0.0100 Epoch 72/100 35/35 [==============================] - 83s 2s/step - loss: 0.6808 - accuracy: 0.7195 - val_loss: 0.5257 - val_accuracy: 0.7005 - lr: 1.0000e-06 Epoch 73/100 35/35 [==============================] - 82s 2s/step - loss: 0.7545 - accuracy: 0.6577 - val_loss: 0.5364 - val_accuracy: 0.6853 - lr: 1.0000e-06
plot(history)
result = ACC_model_eval(acc_lvl_classifier,Xtest_acc,ytest_acc)
8/8 [==============================] - 3s 262ms/step
Model Evaluation:
Accuracy Score: 70.26 %
Precision Score: 91.89 %
Recall Score: 70.26 %
F1 Score: 75.23 %
Classification Report:
precision recall f1-score support
0 1.00 0.39 0.56 96
1 0.00 0.00 0.00 10
2 0.78 1.00 0.88 36
3 0.98 1.00 0.99 45
4 1.00 1.00 1.00 45
accuracy 0.70 232
macro avg 0.75 0.68 0.68 232
weighted avg 0.92 0.70 0.75 232
Confusion Matrix:
2.F Bidirectional LSTM for the Classification the Critical risk
# Building Bi-directional LSTM Model
inputs = Input(shape=(Xtrain_cri.shape[1],))
meta = Input(shape=(1,))
embedding = Embedding(vocab_size, embedding_size, weights=[embedding_matrix], trainable=False)(inputs)
LSTM1 = Bidirectional(LSTM(256, return_sequences = True))(embedding)
maxpooling = GlobalMaxPool1D()(LSTM1)
drop_out1 = Dropout(0.5, input_shape = (384,))(maxpooling)
dense1 = Dense(128, activation = 'relu')(drop_out1)
drop_out2 = Dropout(0.5, input_shape = (256,))(dense1)
dense2 = Dense(64, activation = 'relu')(drop_out2)
drop_out3 = Dropout(0.5, input_shape = (128,))(dense2)
dense3 = Dense(33, activation='softmax')(drop_out3)
opt = SGD(lr=0.01, momentum=0.9)
cri_lvl_classifier = Model(inputs =inputs, outputs= dense3)
cri_lvl_classifier.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
print(cri_lvl_classifier.summary())
Model: "model_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_5 (InputLayer) [(None, 201)] 0
embedding_3 (Embedding) (None, 201, 200) 601600
bidirectional_1 (Bidirectio (None, 201, 512) 935936
nal)
global_max_pooling1d_3 (Glo (None, 512) 0
balMaxPooling1D)
dropout_16 (Dropout) (None, 512) 0
dense_24 (Dense) (None, 128) 65664
dropout_17 (Dropout) (None, 128) 0
dense_25 (Dense) (None, 64) 8256
dropout_18 (Dropout) (None, 64) 0
dense_26 (Dense) (None, 33) 2145
=================================================================
Total params: 1,613,601
Trainable params: 1,012,001
Non-trainable params: 601,600
_________________________________________________________________
None
history = cri_lvl_classifier.fit(Xtrain_cri,ytrain_label, epochs=100,validation_data=(Xval_cri, yval_label),callbacks=[callback, rlrp],batch_size=10, verbose=1)
cri_lvl_classifier.save('critical.h5',history) #saving the model
Epoch 1/100 546/546 [==============================] - 244s 443ms/step - loss: 3.0507 - accuracy: 0.1698 - val_loss: 1.6768 - val_accuracy: 0.5716 - lr: 0.0100 Epoch 2/100 546/546 [==============================] - 227s 415ms/step - loss: 1.5621 - accuracy: 0.5373 - val_loss: 0.6047 - val_accuracy: 0.8662 - lr: 0.0100 Epoch 3/100 546/546 [==============================] - 235s 430ms/step - loss: 0.9335 - accuracy: 0.6996 - val_loss: 0.2753 - val_accuracy: 0.9253 - lr: 0.0100 Epoch 4/100 546/546 [==============================] - 230s 422ms/step - loss: 0.6301 - accuracy: 0.7914 - val_loss: 0.1904 - val_accuracy: 0.9378 - lr: 0.0100 Epoch 5/100 546/546 [==============================] - 230s 421ms/step - loss: 0.4996 - accuracy: 0.8340 - val_loss: 0.1432 - val_accuracy: 0.9471 - lr: 0.0100 Epoch 6/100 546/546 [==============================] - 231s 424ms/step - loss: 0.3819 - accuracy: 0.8725 - val_loss: 0.0869 - val_accuracy: 0.9772 - lr: 0.0100 Epoch 7/100 546/546 [==============================] - 232s 425ms/step - loss: 0.3338 - accuracy: 0.8877 - val_loss: 0.0920 - val_accuracy: 0.9720 - lr: 0.0100 Epoch 8/100 546/546 [==============================] - 232s 425ms/step - loss: 0.2889 - accuracy: 0.9115 - val_loss: 0.0578 - val_accuracy: 0.9824 - lr: 0.0100 Epoch 9/100 546/546 [==============================] - 233s 426ms/step - loss: 0.2308 - accuracy: 0.9256 - val_loss: 0.0558 - val_accuracy: 0.9834 - lr: 0.0100 Epoch 10/100 546/546 [==============================] - 233s 426ms/step - loss: 0.2162 - accuracy: 0.9304 - val_loss: 0.0399 - val_accuracy: 0.9844 - lr: 0.0100 Epoch 11/100 546/546 [==============================] - 232s 425ms/step - loss: 0.1885 - accuracy: 0.9388 - val_loss: 0.0324 - val_accuracy: 0.9865 - lr: 0.0100 Epoch 12/100 546/546 [==============================] - 232s 425ms/step - loss: 0.1851 - accuracy: 0.9428 - val_loss: 0.0442 - val_accuracy: 0.9855 - lr: 0.0100 Epoch 13/100 546/546 [==============================] - 250s 459ms/step - loss: 0.1613 - accuracy: 0.9485 - val_loss: 0.0349 - val_accuracy: 0.9876 - lr: 0.0100 Epoch 14/100 546/546 [==============================] - 233s 426ms/step - loss: 0.1577 - accuracy: 0.9542 - val_loss: 0.0405 - val_accuracy: 0.9855 - lr: 0.0100 Epoch 15/100 546/546 [==============================] - 240s 439ms/step - loss: 0.1257 - accuracy: 0.9593 - val_loss: 0.0223 - val_accuracy: 0.9938 - lr: 0.0100 Epoch 16/100 546/546 [==============================] - 234s 429ms/step - loss: 0.1162 - accuracy: 0.9621 - val_loss: 0.0317 - val_accuracy: 0.9896 - lr: 0.0100 Epoch 17/100 546/546 [==============================] - 233s 427ms/step - loss: 0.1351 - accuracy: 0.9579 - val_loss: 0.0259 - val_accuracy: 0.9917 - lr: 0.0100 Epoch 18/100 546/546 [==============================] - 235s 430ms/step - loss: 0.1083 - accuracy: 0.9683 - val_loss: 0.0234 - val_accuracy: 0.9927 - lr: 0.0100 Epoch 19/100 546/546 [==============================] - 273s 501ms/step - loss: 0.1076 - accuracy: 0.9657 - val_loss: 0.0173 - val_accuracy: 0.9927 - lr: 0.0100 Epoch 20/100 546/546 [==============================] - 256s 469ms/step - loss: 0.1236 - accuracy: 0.9601 - val_loss: 0.0248 - val_accuracy: 0.9938 - lr: 0.0100 Epoch 21/100 546/546 [==============================] - 234s 429ms/step - loss: 0.1075 - accuracy: 0.9689 - val_loss: 0.0200 - val_accuracy: 0.9927 - lr: 0.0100 Epoch 22/100 546/546 [==============================] - 237s 433ms/step - loss: 0.0851 - accuracy: 0.9756 - val_loss: 0.0199 - val_accuracy: 0.9917 - lr: 0.0100 Epoch 23/100 546/546 [==============================] - 243s 445ms/step - loss: 0.0874 - accuracy: 0.9747 - val_loss: 0.0212 - val_accuracy: 0.9927 - lr: 0.0100 Epoch 24/100 546/546 [==============================] - 243s 445ms/step - loss: 0.0941 - accuracy: 0.9707 - val_loss: 0.0259 - val_accuracy: 0.9896 - lr: 0.0100 Epoch 25/100 546/546 [==============================] - 245s 449ms/step - loss: 0.0899 - accuracy: 0.9711 - val_loss: 0.0286 - val_accuracy: 0.9886 - lr: 1.0000e-06 Epoch 26/100 546/546 [==============================] - 266s 486ms/step - loss: 0.0890 - accuracy: 0.9755 - val_loss: 0.0286 - val_accuracy: 0.9886 - lr: 1.0000e-06 Epoch 27/100 546/546 [==============================] - 277s 508ms/step - loss: 0.0930 - accuracy: 0.9734 - val_loss: 0.0285 - val_accuracy: 0.9886 - lr: 1.0000e-06 Epoch 28/100 546/546 [==============================] - 273s 500ms/step - loss: 0.0858 - accuracy: 0.9740 - val_loss: 0.0285 - val_accuracy: 0.9886 - lr: 1.0000e-06 Epoch 29/100 546/546 [==============================] - 278s 509ms/step - loss: 0.1066 - accuracy: 0.9659 - val_loss: 0.0285 - val_accuracy: 0.9907 - lr: 1.0000e-06
plot(history)
result = CRI_model_eval(cri_lvl_classifier,Xtest_cri,ytest_cri,cri_encoder)
36/36 [==============================] - 13s 319ms/step
Model Evaluation:
Accuracy Score: 98.94 %
Precision Score: 99.1 %
Recall Score: 98.94 %
F1 Score: 98.98 %
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 27
1 1.00 1.00 1.00 28
2 1.00 1.00 1.00 30
3 1.00 1.00 1.00 41
4 1.00 1.00 1.00 36
5 1.00 1.00 1.00 43
6 1.00 1.00 1.00 31
7 1.00 1.00 1.00 41
8 1.00 1.00 1.00 30
9 1.00 0.95 0.98 43
10 1.00 0.94 0.97 36
11 1.00 1.00 1.00 42
12 1.00 1.00 1.00 34
13 1.00 1.00 1.00 39
14 1.00 1.00 1.00 29
15 0.89 1.00 0.94 17
16 0.80 0.95 0.87 41
17 1.00 1.00 1.00 35
18 1.00 1.00 1.00 31
19 1.00 1.00 1.00 30
20 1.00 0.93 0.96 41
21 1.00 1.00 1.00 33
22 1.00 1.00 1.00 33
23 1.00 1.00 1.00 42
24 1.00 1.00 1.00 34
25 1.00 1.00 1.00 36
26 1.00 1.00 1.00 31
27 1.00 1.00 1.00 33
28 1.00 1.00 1.00 34
29 1.00 1.00 1.00 22
30 1.00 0.95 0.97 41
31 1.00 1.00 1.00 34
32 1.00 0.97 0.99 36
accuracy 0.99 1134
macro avg 0.99 0.99 0.99 1134
weighted avg 0.99 0.99 0.99 1134
36/36 [==============================] - 12s 345ms/step
Chatbot
# Set the maximum sequence length to 200
maxlen = 200
# Load the accident prediction model from 'accident level.h5' file
accident = load_model('accidentlevel.h5')
# Load the criticality classification model from 'critical.h5' file
critical = load_model('critical.h5')
def clean_up(sentence):
# Convert the sentence to lowercase
sentence_words = sentence.lower()
# Remove leading and trailing whitespaces
sentence_words = sentence_words.strip()
# Replace all non-alphanumeric characters with spaces
sentence_words = re.sub(r'[^a-zA-Z0-9]', ' ', sentence_words)
# Remove any digits from the sentence
sentence_words = re.sub(r'\d+', '', sentence_words)
def remove_stopwords(data):
# Remove stopwords from the sentence
return ' '.join([word for word in data.split() if word not in stopwords.words('english')])
# Call the remove_stopwords function to eliminate common stopwords
sentence_words = remove_stopwords(sentence_words)
# Return the cleaned-up sentence
return sentence_words
def create_data(sentence):
# Clean up the sentence using the clean_up function
sentence_word = clean_up(sentence)
# Split the sentence into lines and store them as elements in a list
lines = sentence_word.strip().split("\n")
elements = [line for line in lines]
# Convert the list of elements into a pandas Series
series = pd.Series(elements)
# Create a tokenizer and fit it on the Series to create word-to-index mappings
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(series)
# Convert the text data into sequences of integers based on the tokenizer's word-to-index mappings
sentence_word = tokenizer.texts_to_sequences(series)
# Pad the sequences to ensure they have the same length (maxlen)
sentence_word = pad_sequences(sentence_word, padding='post', maxlen=maxlen)
# Determine the size of the vocabulary (number of unique words + 1 for the reserved 0 index)
vocab_size = len(tokenizer.word_index) + 1
# Return the padded sequences and the vocabulary size
return sentence_word, vocab_size
def predict_accident_level(sentence, model):
# Create data for prediction using the provided sentence and the model's tokenizer
p, vocab_size = create_data(sentence)
# Make predictions using the model
res = model.predict(p)
# Find the maximum probability and its corresponding index
max_prob = np.max(res[0])
max_prob_index = np.argmax(res)
# Return the index with the highest probability
return np.argmax(res)
def predict_critical_risk(sentence, accident_lvl, model):
# Create data for prediction using the provided sentence and the model's tokenizer
p, vocab_size = create_data(sentence)
# Insert the accident level into the input sequence and reshape it to include the additional dimension
p = np.insert(p, maxlen, accident_lvl).reshape(1, maxlen+1)
# Make predictions using the model
res = model.predict(p)
# Sort the predictions in descending order
res = np.argsort(-res)
# Find the maximum probability
max_prob = np.max(res[0])
# Find the index with the highest probability
max_prob_index = np.argmax(res)
# Return the top 3 predictions
return res[0][:3]
#defining the chatbot function respose
def response(text):
# Predict the accident level for the given text using the 'accident' model
accident_lvl = predict_accident_level(text, accident)
# Predict the critical risk for the given text and the predicted accident level using the 'critical' model
critical_risk = predict_critical_risk(text, accident_lvl, critical)
# Return the accident level and the critical risk predictions
return accident_lvl, critical_risk
# Printing the introduction to the Incident Analyzer
print("Welcome! I am Your AI-Based Incident Analyzer. How may I assist you today?\nEnter your incident:")
# Main loop that runs continuously until the user decides to quit
while True:
text = input("\nYou:") # Getting user input
if text.lower() in ["hi", "hello", "hey"]:
print("Hello, how may I help you?")
elif text.lower() in ['quit', 'bye', 'goodbye', 'get lost', 'see you']:
# If user inputs a quit command, exit the loop and terminate the program
print("Thanks for Using the Incident Analyzer")
break
elif text.lower() in ["accident", "please help", "i need information"]:
print("Please elaborate on your incident information in detail to get proper responses.")
else:
# Call the response function to predict accident level and critical risk
acc, cri = response(text)
# Encode the accident level prediction for better presentation
encode = {0: 'I', 1: 'II', 2: 'III', 3: 'IV', 4: 'V'}
acc = encode[acc]
# Assuming cri_encoder is an instance of some encoder used for critical risk predictions
result = cri_encoder.inverse_transform(cri)
print("\nBot:") # Print the result as bot response
print("Accident Level:", acc)
print("Possible Critical Risk:", result)
Welcome! I am Your AI-Based Incident Analyzer. How may I assist you today? Enter your incident: You:hi Hello, how may I help you? You:accident Please elaborate on your incident information in detail to get proper responses. You:In moments that the truck of transport of personnel of the company MCEISA plate AJG-751, moved from the Nv 3300 to surface, missing 50m to reach the mouth of the Nv 3900, the gearbox does not respond so the driver stops the truck to inspect it along with maintenance personnel who At that time I was traveling in the truck. They raise the cabin to manually put the change in first and continue the trip, culminated this task the driver with the support of the maintenance personnel lower the cabin which due to the weight falls hitting the driver between the cabin and the hopper of the truck. At the time of the accident the employee was wearing his safety helmet and chin strap. 1/1 [==============================] - 0s 284ms/step 1/1 [==============================] - 2s 2s/step Bot: Accident Level: I Possible Critical Risk: ['Others' 'Fall' 'Vehicles and Mobile Equipment'] You:During field trip, to return to work after lunch, the employee of the Wellfield company slipped on the loose stones that were in the place. At that moment, with the intention of balancing, he tried hold onto a tree, falling under the right arm causing fracture at the distal end of the radius. The activity was paralyzed and the employee was referred to the Hospital in Paracatu where he underwent medical care. 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 25ms/step Bot: Accident Level: I Possible Critical Risk: ['Others' 'Vehicles and Mobile Equipment' 'Fall'] You:The operator of the Scissor 187 leaves his equipment parked at level -2995 Acc 7 due to electrical problems, when the maintenance personnel arrives, the electrician climbs on the control platform of the equipment and performs the verification of the hydraulic system confirming the problem; then in coordination with the mechanic decide to perform the test with the diesel system, moments in which accidentally activates with the body the arm movement lever causing the drill arm to move downwards generating the left hand's atricion against the support of the pivot tube generating the lesion described. At the time of the accident the electrician was alone on the control platform, while the mechanic was at ground level observing the pressure on the diesel system pressure gauge. 1/1 [==============================] - 0s 34ms/step 1/1 [==============================] - 0s 18ms/step Bot: Accident Level: I Possible Critical Risk: ['Others' 'Fall' 'Vehicles and Mobile Equipment'] You:hand cut, bone frature 1/1 [==============================] - 0s 22ms/step 1/1 [==============================] - 0s 28ms/step Bot: Accident Level: I Possible Critical Risk: ['Cut' 'Others' 'Pressed'] You:eye lost vision due to increase in temperature and concussion 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 30ms/step Bot: Accident Level: I Possible Critical Risk: ['Cut' 'Others' 'Manual Tools'] You:quit Thanks for Using the Incident Analyzer
import tkinter as tk
from tkinter import scrolledtext
from tkinter import messagebox
from textblob import TextBlob
accident = load_model('accident level.h5')
critical = load_model('critical.h5')
def clean_up(sentence):
sentence_words = sentence.lower()
sentence_words = sentence_words.strip()
sentence_words = re.sub(r'[^a-zA-Z0-9]', ' ', sentence_words)
sentence_words = re.sub(r'\d+', '', sentence_words)
def remove_stopwords(data):
return ' '.join([word for word in data.split() if word not in stopwords.words('english')])
sentence_words = remove_stopwords(sentence_words)
return sentence_words
def create_data(sentence):
sentence_word = clean_up(sentence)
lines = sentence_word.strip().split("\n")
elements = [line for line in lines]
series = pd.Series(elements)
tokenizer = Tokenizer(num_words=500)
tokenizer.fit_on_texts(series)
sentence_word = tokenizer.texts_to_sequences(series)
sentence_word = pad_sequences(sentence_word, padding='post', maxlen=200)
vocab_size = len(tokenizer.word_index) + 1
return sentence_word,vocab_size
def predict_accident_level(sentence, model):
p, vocab_size = create_data(sentence)
res = model.predict(p)
max_prob = np.max(res[0])
max_prob_index = np.argmax(res[0])
return max_prob_index
def predict_critical_risk(sentence, accident_lvl, model):
p, vocab_size = create_data(sentence)
p = np.insert(p, 200, accident_lvl).reshape(1, 201)
res = model.predict(p)
res = np.argsort(-res)
max_prob = np.max(res[0])
max_prob_index = np.argmax(res[0])
return res[0][:3]
def correct_text(text):
blob = TextBlob(text)
corrected_text = blob.correct()
return str(corrected_text)
def response(text):
text = correct_text(text)
if "hi" in text.lower() or "hello" in text.lower() or "hey" in text.lower():
return "Hello, how may I help you?", None
if "accident" in text.lower() or "please help" in text.lower() or "i need information" in text.lower():
return "Please elaborate on your incident information in detail to get proper responses.", None
accident_lvl = predict_accident_level(text, accident)
if accident_lvl is not None and np.max(accident_lvl) <= 0.21:
return None, None
critical_risk = predict_critical_risk(text, accident_lvl, critical)
return accident_lvl, critical_risk
class ChatBotApp(tk.Tk):
def __init__(self):
super().__init__()
self.title("ChatBot - Incident Analyzer")
self.geometry("400x600")
# Create and place the chat display area
self.chat_display = scrolledtext.ScrolledText(self, wrap=tk.WORD, state=tk.DISABLED, font=("Helvetica", 12))
self.chat_display.grid(row=0, column=0, padx=10, pady=10, columnspan=2, sticky="nsew")
# Create the input area
self.input_entry = tk.Entry(self, font=("Helvetica", 12))
self.input_entry.grid(row=1, column=0, padx=10, pady=10, sticky="ew")
# Create the send button
self.send_button = tk.Button(self, text="Send", command=self.on_send, font=("Helvetica", 12))
self.send_button.grid(row=1, column=1, padx=10, pady=10, sticky="ew")
# Configure the grid to expand with the window size
self.grid_columnconfigure(0, weight=1)
self.grid_rowconfigure(0, weight=1)
# Initialize chat display with WhatsApp-like aesthetics
self.chat_display.configure(bg="#e5dfd6", fg="#000000")
self.chat_display.tag_configure("user", foreground="blue")
self.chat_display.tag_configure("bot", foreground="green")
self.display_bot_message("Welcome! I'm Your AI-Based Incident Analyzer. How May I Assist You Today?")
def on_send(self):
user_message = self.input_entry.get()
if user_message.strip() == "":
messagebox.showinfo("Error", "Please enter a message.")
return
acc, cri = response(user_message)
self.input_entry.delete(0, tk.END)
self.display_user_message(user_message)
if isinstance(acc, str): # Check if acc is a string (i.e., a message)
bot_response = acc # Use the message as bot's response
elif acc is not None:
encode = {0: 'I', 1: 'II', 2: 'III', 3: 'IV', 4: 'V'}
acc = encode[acc]
result = cri_encoder.inverse_transform(cri)
bot_response = f"Accident Level: {acc}\nPossible Critical Risk: {result}\n"
else:
bot_response = "This does not seem to be an accident.\nThank you for using our services. Have a nice day.\n"
self.display_bot_message(bot_response)
def display_user_message(self, message):
self.chat_display.configure(state=tk.NORMAL)
self.chat_display.insert(tk.END, "You: " + message + "\n", "user")
self.chat_display.see(tk.END)
self.chat_display.configure(state=tk.DISABLED)
def display_bot_message(self, message):
self.chat_display.configure(state=tk.NORMAL)
self.chat_display.insert(tk.END, "Bot: " + message + "\n", "bot")
self.chat_display.see(tk.END)
self.chat_display.configure(state=tk.DISABLED)
if __name__ == "__main__":
app = ChatBotApp()
app.mainloop()
1/1 [==============================] - 0s 440ms/step